In my initial analysis of the freshwater-saltwater dataset, I had three vcf files: 1. one generated from all pairwise comparisons of populations, containing SNPs only found in all 16 populations, in 75% of individuals, and with a minor allele frequency of at least 5%. (“separate”) 2. one generated from all pairwise comparisons of populations, containing SNPs found in 4 populations, in 75% of individuals, and with a minor allele frequency of at least 5%. (“P4”) 3. one generated from comparing lumped ‘freshwater’ and ‘saltwater’ populations, containing SNPs found in 50% of individuals and with a minor allele frequency of at least 5%. (“lumped”)
I did the majority of the analyses using set #3, but would like to explore what changes if I use dataset #2. I think #1 is too restrictive. Dataset #2 is in the “subset” dataset, and is what I ran the structure analyses on. This is the dataset I’m going to move forward with for the paper.
Note that in most of these cases the actual analysis will be set to eval=FALSE once I’ve run it once, because then I save the output and only have to read it in, saving compilation time.
source("../../gwscaR/R/gwscaR.R")
source("../../gwscaR/R/gwscaR_plot.R")
source("../../gwscaR/R/gwscaR_utility.R")
source("../../gwscaR/R/gwscaR_fsts.R")
source("../../gwscaR/R/gwscaR_popgen.R")
source("../scripts/002_treemix_plotting_funcs.R")#I've modified these functions
library(knitr)
pop.list<-c("TXSP","TXCC","TXFW","TXCB","LAFW","ALST","ALFW","FLSG","FLKB",
"FLFD","FLSI","FLAB","FLPB","FLHB","FLCC","FLLG")
pop.labs<-c("TXSP","TXCC","TXFW","TXCB","LAFW","ALST","ALFW","FLSG","FLKB",
"FLFD","FLSI","FLAB","FLPB","FLHB","FLCC","FLFW")
fw.list<-c("TXFW","LAFW","ALFW","FLLG")
sw.list<-c("TXSP","TXCC","TXCB","ALST","FLSG","FLKB",
"FLFD","FLSI","FLAB","FLPB","FLHB","FLCC")
lgs<-c("LG1","LG2","LG3","LG4","LG5","LG6","LG7","LG8","LG9","LG10","LG11",
"LG12","LG13","LG14","LG15","LG16","LG17","LG18","LG19","LG20","LG21",
"LG22")
lgn<-seq(1,22)
all.colors<-c(rep("black",2),"#2166ac","black","#2166ac","black","#2166ac",
rep("black",8),"#2166ac")
grp.colors<-c('#762a83','#af8dc3','#e7d4e8','#d9f0d3','#7fbf7b','#1b7837')
pwise.fst.all<-read.table("stacks/fwsw_fst_summary.txt",header=T,row.names=1,sep='\t')
#pwise.fst.all<-rbind(pwise.fst.all,rep(NA,ncol(pwise.fst.all)))
rownames(pwise.fst.all)<-pop.labs
colnames(pwise.fst.all)<-pop.labs
pwise.fst.sub<-read.table("stacks/fwsw_fst_summary_subset.txt",header=T,row.names=1,sep='\t')
colnames(pwise.fst.sub)<-pop.labs
rownames(pwise.fst.sub)<-pop.labs
ped.sub<-read.table("stacks/subset.ped",header=F)
ped.sub$V1<-gsub("sample_(\\w{4})\\w+.*","\\1",ped.sub$V2)
map.sub<-read.table("stacks/subset.map",header = F,stringsAsFactors = F)
map.sub$Locus<-paste(gsub("(\\d+)_\\d+","\\1",map.sub$V2),(as.numeric(map.sub$V4)+1),sep=".")
colnames(ped.sub)<-c("Pop","IID","","","","Phenotype","","",map.sub$Locus)
This P4/subset dataset has 9820 SNPs from 9820 RAD loci, from 698 indivudals in 16 populations.
The first thing to do is to create a vcf file using the subset parameters. I’ve already got a whitelist of loci in the subsetted dataset, so I need to run populations -b 2 -W fwsw_results/subset.whitelist.txt -P fwsw_results/stacks -M fwsw_pops_map.txt --vcf, which I did on 2017-12-18 on silivren-lond. I then re-named it to p4.vcf (and the other output files).
vcf<-parse.vcf("p4.upd.vcf") #this is the smaller dataset
vcf$SNP<-paste(vcf$`#CHROM`,vcf$POS,sep=".")
scaffs<-levels(as.factor(vcf[,1]))
scaffs[1:22]<-lgs
scaff.starts<-tapply(vcf$POS,vcf$`#CHROM`,max)
scaff.starts<-data.frame(rbind(cbind(names(scaff.starts),scaff.starts)),stringsAsFactors = F)
locus.info<-c(colnames(vcf[1:9]),"SNP")
The vcf file contains 9638 SNPs from 9638 RAD loci.
Choose a subset of the SNPs to re-use. [Do I need to do this?]
chosen.snps<-choose.one.snp(vcf)$SNP
write.table(chosen.snps,"chosen.all.snps.txt",quote=F)
chosen.snps<-unlist(read.table("chosen.all.snps.txt"))
There are 2546 SNPs that I’ll use from the vcf file, from 9638 RAD loci.
The first figure in the paper is a map of the collection sites.
jpeg("all_sites_map.jpg", res=300, height=7,width=14, units="in")
pdf("all_sites_map.pdf",height=7,width=14)
par(oma=c(0,0,0,0),mar=c(0,0,0,0),pin=c(7,7))
map("worldHires", "usa",xlim=c(-100,-76), ylim=c(24,32),
col="gray90", mar=c(0,0,0,0),fill=TRUE, res=300,myborder=0)
map("worldHires", "mexico",xlim=c(-100,-76), ylim=c(24,32),
col="gray95", fill=TRUE, add=TRUE)
points(mar.coor$lon, mar.coor$lat, col="black", cex=1.2, pch=19)
points(-1*fw.coor$lon, fw.coor$lat, col="#2166ac", cex=1.5, pch=18)
abline(h=c(25,30,35),lty=3)
abline(v=c(-80,-85,-90,-95,-100),lty=3)
text(x=c(-99.5,-99.5),y=c(25,30),c("25N","30N"))
text(x=c(-80,-85,-90,-95),y=rep(31.8,4),c("80W","85W","90W","95W"))
text(y=26,x=-90,"Gulf of Mexico")
text(y=25.5,x=-98.5,"Mexico")
text(x=-91,y=31,"USA")
text(x=-78,y=29.5,"Atlantic Ocean")
text(x=-96.5,y=26,"TXSP",font=2)
text(x=-96.9,y=27.2,"TXCC",font=2)
text(x=-96,y=28.3,"TXFW",font=2,col="#2166ac")
text(x=-94.7,y=29,"TXCB",font=2)
text(x=-90.2,y=30.3,"LAFW",font=2,col="#2166ac")
text(x=-88,y=30,"ALST",font=2)
text(x=-87,y=30.75,"ALFW",font=2,col="#2166ac")
text(x=-85,y=29.4,"FLSG",font=2)
text(x=-83.5,y=29.2,"FLKB",font=2)
text(x=-83.2,y=27.6,"FLFD",font=2)
text(x=-82.2,y=26,"FLSI",font=2)
text(x=-80,y=24.8,"FLAB",font=2)
text(x=-79.5,y=26.8,"FLPB",font=2)
text(x=-79.7,y=27.2,"FLHB",font=2)
text(x=-80.2,y=28.2,"FLCC",font=2)
text(x=-80.9,y=29.3,"FLFW",font=2,col="#2166ac")
dev.off()
Figure 1. Map of collection sites
The second figure in the paper is showing population structure, using STRUCTURE, adegenet, and PCAdapt. These analyses were run as exactly written in fwsw_analysis.R, so I won’t reproduce that code here.
Figure 2. Population Structure
I don’t need to re-calculate pairwise Jost’s D, and Fsts using the P4 (or “subset”) dataset, so I can just read in those files. But I do need to run Treemix and PopTree2.
To run treemix, I follow the following steps:
-m.All of these require setting a root, which is FLPB based on previous trees.
First, I need to create a file in the correct format, which uses the vcf file:
treemix.name<-"treemix/p4_treemix"
treemix.prefix<-"treemix/p4_"
poporder.file<-"treemix/poporder"
fst.tree.name<-as.character("ALLfst_cov_heatmap.png")
tm.fwsw<-treemix.from.vcf(vcf,pop.list)
write.table(tm.fwsw,treemix.name,col.names=TRUE,row.names=FALSE,quote=F,sep=' ')
#then in unix: gzip -c treemix.name > treemix.name.gz
Then, in unix, I need to run gzip -c treemix/p4_treemix > treemix/p4_treemix.gz. Now I can run scripts/run_treemix.sh, which implements steps 1 and 2, and which I need to run in Ubuntu. Note that there are a lot of “no counts” warnings from treemix. Also, that it runs very quickly
After that, I can evaluate the different outcomes.
setwd("treemix")
source("../../scripts/002_treemix_plotting_funcs.R")#I've modified these functions
poporder<-c("TXSP","TXCC","TXFW","TXCB","LAFW","ALST",
"ALFW","FLSG","FLKB","FLFD","FLSI","FLAB",
"FLPB","FLHB","FLCC","FLLG")
colors<-poporder
colors[colors %in% "FLLG"]<-grp.colors[6]
colors[colors %in% c("FLPB","FLHB","FLCC")]<-grp.colors[6]
colors[colors %in% c("FLAB")]<-grp.colors[5]
colors[colors %in% c("FLSI","FLFD","FLKB","FLSG")]<-grp.colors[3]
colors[colors %in% c("ALST","ALFW","LAFW")]<-grp.colors[2]
colors[colors %in% c("TXSP","TXCC","TXFW","TXCB")]<-grp.colors[1]
write.table(cbind(poporder,colors),"poporder",quote=F,sep='\t')
setwd("../")
m0<-treemix.cov.plot(paste(treemix.prefix,"k100bFLPBr",sep=""),poporder,split=c(1,1,3,2),more=TRUE)
m1<-treemix.cov.plot(paste(treemix.prefix,"k100bFLPBrm1",sep=""),poporder,split=c(2,1,3,2),more=TRUE)
m2<-treemix.cov.plot(paste(treemix.prefix,"k100bFLPBrm2",sep=""),poporder,split=c(3,1,3,2),more=TRUE)
m3<-treemix.cov.plot(paste(treemix.prefix,"k100bFLPBrm3",sep=""),poporder,split=c(1,2,3,2),more=TRUE)
m4<-treemix.cov.plot(paste(treemix.prefix,"k100bFLPBrm4",sep=""),poporder,split=c(2,2,3,2),more=TRUE)
m5<-treemix.cov.plot(paste(treemix.prefix,"k100bFLPBrm5",sep=""),poporder,split=c(3,2,3,2),more=FALSE)
par(mfrow=c(2,3))
r0<-plot_resid(paste(treemix.prefix,"k100bFLPBr",sep=""),poporder.file)
r1<-plot_resid(paste(treemix.prefix,"k100bFLPBrm1",sep=""),poporder.file)
r2<-plot_resid(paste(treemix.prefix,"k100bFLPBrm2",sep=""),poporder.file)
r3<-plot_resid(paste(treemix.prefix,"k100bFLPBrm3",sep=""),poporder.file)
r4<-plot_resid(paste(treemix.prefix,"k100bFLPBrm4",sep=""),poporder.file)
r5<-plot_resid(paste(treemix.prefix,"k100bFLPBrm5",sep=""),poporder.file)
Population pairs that are ‘too far apart’ on the tree (have high error estimates) are ones that are likely candidates for gene flow - and these are the squares with dark greens, blues, and black. These are LAFW-TXFW and ALFW-TXFW in almost all of the SE graphs
par(mfrow=c(2,3),mar=c(1,1,1,1),oma=c(1,1,1,1))
t0<-plot_tree(paste(treemix.prefix,"k100bFLPBr",sep=""),plotmig = F,plus=0.05,scale=F,mbar=F)
t1<-plot_tree(paste(treemix.prefix,"k100bFLPBrm1",sep=""),plus=0.05,scale=F,mbar=F)
t2<-plot_tree(paste(treemix.prefix,"k100bFLPBrm2",sep=""),plus=0.05,scale=F,mbar=F)
t3<-plot_tree(paste(treemix.prefix,"k100bFLPBrm3",sep=""),plus=0.05,scale=F,mbar=F)
t4<-plot_tree(paste(treemix.prefix,"k100bFLPBrm4",sep=""),plus=0.05,scale=F,mbar=F)
t5<-plot_tree(paste(treemix.prefix,"k100bFLPBrm5",sep=""),plus=0.05,scale=F,mbar=F)
Look at the p-values: do migration events always improve the fit of the data?
tree0<-read.table(gzfile(paste(treemix.prefix,"k100bFLPBr.treeout.gz",sep="")), as.is = T, comment.char = "", quote = "")
tree1<-read.table(gzfile(paste(treemix.prefix,"k100bFLPBrm1.treeout.gz",sep="")), as.is = T, comment.char = "", quote = "",skip=1)
tree2<-read.table(gzfile(paste(treemix.prefix,"k100bFLPBrm2.treeout.gz",sep="")), as.is = T, comment.char = "", quote = "",skip=1)
tree3<-read.table(gzfile(paste(treemix.prefix,"k100bFLPBrm3.treeout.gz",sep="")), as.is = T, comment.char = "", quote = "",skip=1)
tree4<-read.table(gzfile(paste(treemix.prefix,"k100bFLPBrm4.treeout.gz",sep="")), as.is = T, comment.char = "", quote = "",skip=1)
tree5<-read.table(gzfile(paste(treemix.prefix,"k100bFLPBrm5.treeout.gz",sep="")), as.is = T, comment.char = "", quote = "",skip=1)
tree1[,4]
## [1] 9.18188e-12
tree2[,4]
## [1] "6.61839e-05" "<2.22507e-308"
tree3[,4]
## [1] "1.02604e-05" "<2.22507e-308" "1.88599e-10"
tree4[,4]
## [1] "<2.22507e-308" "<2.22507e-308" "<2.22507e-308" "2.89325e-05"
tree5[,4]
## [1] "6.55032e-15" "4.94959e-09" "<2.22507e-308" "0.00690872"
## [5] "<2.22507e-308"
All of the added migration edges improve the fit. Evaluating both the residual and migration edge plots, we can see that four migration edges reduces the error between LAFW & TXFW and ALFW & TXFW. The largest SE is between FLFW (FLLG) and itself, and secondarily between LAFW and itself. This means that those branches are surprisingly long, which I am comfortable accepting. The strongest migration edge, between the branch from the Atlantic Florida to Gulf Florida populations -> FLAB, is unsurprising because that is an intermediate location between the Atlantic and Gulf coasts. In some of the structure analyses it clusters with the Gulf coast and in others it is its own admixed group - supporting the tree structure here.
The other migration events are somewhat more surprising. We will evaluate those using the three and four population analyses.
First, let’s read in the three and four population analysis results. These tested all possible three- and four-population tree groups to see whether they pass a test of ‘treeness’. If not, their p-value will be small, and they don’t form a nice tree.
f3.name<-"treemix/p4_threepop.txt"
f4.name<-"treemix/p4_fourpop.txt"
#extract the relevant lines
extract.fs<-function(filename,pat="^[A-Z]{4};"){
f<-readLines(filename)
fmatch<-f[grep(pat,f)]
f<-read.table(text=fmatch)
colnames(f)<-c("pops","f","SE","Z")
return(f)
}
f3<-extract.fs(f3.name)
f4<-extract.fs(f4.name,pat="^[A-Z]{4},")
#add p-values
f3$p<-pnorm(f3$Z)
f4$p<-pnorm(f4$Z)
Now we can investigate the migration edges added to the trees.
This least surprising migration edge would be expected to show a signature of admixture. I’ll check the treeness of FLHB;FLSI,FLAB and FLHB,FLCC;FLSI,FLAB. For comparison, I can look at FLHB;FLSI,FLFD and FLHB,FLCC;FLSI,FLFD
checks3<-c("FLHB;FLSI,FLAB","FLHB;FLFD,FLSI")
f3[f3$pops %in% checks3,]
## pops f SE Z p
## 1583 FLHB;FLFD,FLSI 0.0250745 0.000545858 45.9359 1
## 1625 FLHB;FLSI,FLAB 0.0224990 0.000508579 44.2390 1
checks4<-c("FLSI,FLAB;FLHB,FLCC","FLFD,FLSI;FLHB,FLCC")
f4[f4$pops %in% checks4,]
## pops f SE Z p
## 5379 FLFD,FLSI;FLHB,FLCC -0.000159989 6.74083e-05 -2.37343 8.811867e-03
## 5425 FLSI,FLAB;FLHB,FLCC -0.000470815 8.68360e-05 -5.42188 2.948773e-08
They pass the three-population tests but not the four-population tests.
To investigate this migration edge, I need to evaluate whether a [TXFW[TXCB,TXCC]] or [TXFW[TXCB,TXSP]] topology is an appropriate tree.
checks<-c("TXFW;TXCC,TXCB",
"TXFW;TXSP,TXCB")
f3[f3$pops %in% checks,]
## pops f SE Z p
## 44 TXFW;TXSP,TXCB 0.0294602 0.00227983 12.9221 1
## 318 TXFW;TXCC,TXCB 0.0304362 0.00230450 13.2073 1
Neither of these fail the treeness test, so TXFW is essentially functioning as an effective outgroup/more ancestral population. Let’s check if a four population tree also makes sense.
f4[f4$pops %in% "TXSP,TXCC;TXFW,TXCB",]
## pops f SE Z p
## 2 TXSP,TXCC;TXFW,TXCB 0.000976003 0.000160092 6.09653 1
This also passes the treeness test - so this isn’t an actual admixture event, more of demonstrating shared ancestry.
This migration edge would suggest a three-population structure of [TXFW[ALFW,LAFW]] or a four-population structure of [TXFW,TXCB[LAFW,ALFW]] or [TXFW,ALST[LAFW,ALFW]]
checks<-c("TXFW,TXCB;LAFW,ALFW",
"TXFW,ALST;LAFW,ALFW")#to get the correct order of populations I had to
#use grep and manually try a few different ones
f4[f4$pops %in% checks,]
## pops f SE Z p
## 2463 TXFW,TXCB;LAFW,ALFW -0.00140534 0.000542958 -2.58830 4.822547e-03
## 2657 TXFW,ALST;LAFW,ALFW -0.00416643 0.000526403 -7.91491 1.237160e-15
f3[f3$pops == "TXFW;LAFW,ALFW",]
## pops f SE Z p
## 630 TXFW;LAFW,ALFW 0.0349272 0.00105283 33.1745 1
The three-population test does not fail the treeness test but the four-population tests do fail the treeness tests. This suggests that TXFW functions as an outgroup to the LAFW and ALFW - they are perhaps derived from the TXFW population rather than experiencing current admixture.
FLLG is actually FLFW, and there’s a migration edge from it to the Alabama/Louisiana clade and the Texas clade. So, we can test FLLG;ALST,TXFW, FLLG;ALFW,TXCB, FLLG;ALFW,TXFW or FLLG;ALST,TXCC - see if the migration is specifically to freshwater populations or not.
checks<-c("FLLG;TXFW,ALST","FLLG;TXCB,ALFW","FLLG;TXCB,LAFW",#mix of freshwater
"FLLG;TXFW,ALFW","FLLG;TXFW,LAFW", #all freshwater
"FLLG;TXCC,ALST") #no freshwater
f3[f3$pops %in% checks,]
## pops f SE Z p
## 452 FLLG;TXCC,ALST 0.0843051 0.00214158 39.3658 1
## 655 FLLG;TXFW,LAFW 0.0897111 0.00256943 34.9147 1
## 686 FLLG;TXFW,ALST 0.0833365 0.00215944 38.5917 1
## 713 FLLG;TXFW,ALFW 0.0928283 0.00279453 33.2179 1
## 853 FLLG;TXCB,LAFW 0.0857054 0.00220127 38.9345 1
## 911 FLLG;TXCB,ALFW 0.0874172 0.00219662 39.7963 1
These all pass the treeness test.
treemix.file<-as.character("treemix/p4_k100bFLPBr.cov.gz")
tm.vertices<-"treemix/p4_k100bFLPBrm4.vertices.gz"
tm.plot<-"treemix/p4_treemix_m4_FLPB.png"
tm.tree<-"treemix/p4_k100bFLPBrm4"
For the PopTree2 analysis, I need to convert the vcf file to genepop format. I did this using PGDSpider2. Then I ran PopTree2 on Windows10, using Da to calculate neighbor-joining trees and using 1000 bootstrap replicates.
Or, if that doesn’t work,
remove.missing.data<-function(vcf, pop.list){
exclude<-NULL
for(i in 1:nrow(vcf))
{
vcf.row<-vcf[i,colnames(vcf) != "SNP"]#remove this if it exists
missingness<-unlist(lapply(pop.list,function(pop){
pop.vcf<-vcf.row[,grep(pop,colnames(vcf.row))]
missing<-length(grep("\\.\\/\\.",pop.vcf))
prop.missing<-missing/length(pop.vcf)
return(prop.missing)
}))
if(length(missingness[missingness==1])>0){
print(paste("Row ", i, " is has no data for pop ", pop.list[which(missingness==1)]))
exclude<-c(exclude,i)
}
}
if(!is.null(exclude)){
return(vcf[-exclude,])
}else{
return(vcf)
}
}
gpop.name<-"poptree/p4.genepop"
sub.prefix<-"poptree/p4_"
vcf<-remove.missing.data(vcf, pop.list)
## [1] "Row 9 is has no data for pop LAFW"
## [1] "Row 17 is has no data for pop FLCC"
## [2] "Row 17 is has no data for pop FLLG"
## [1] "Row 26 is has no data for pop LAFW"
## [1] "Row 30 is has no data for pop LAFW"
## [1] "Row 46 is has no data for pop FLLG"
## [1] "Row 63 is has no data for pop LAFW"
## [1] "Row 68 is has no data for pop ALFW"
## [1] "Row 107 is has no data for pop FLLG"
## [1] "Row 118 is has no data for pop LAFW"
## [1] "Row 127 is has no data for pop FLCC"
## [2] "Row 127 is has no data for pop FLLG"
## [1] "Row 132 is has no data for pop LAFW"
## [1] "Row 175 is has no data for pop LAFW"
## [1] "Row 182 is has no data for pop LAFW"
## [1] "Row 214 is has no data for pop FLLG"
## [1] "Row 311 is has no data for pop LAFW"
## [2] "Row 311 is has no data for pop FLLG"
## [1] "Row 322 is has no data for pop LAFW"
## [1] "Row 334 is has no data for pop FLLG"
## [1] "Row 357 is has no data for pop FLLG"
## [1] "Row 394 is has no data for pop LAFW"
## [2] "Row 394 is has no data for pop FLLG"
## [1] "Row 414 is has no data for pop LAFW"
## [1] "Row 448 is has no data for pop FLLG"
## [1] "Row 484 is has no data for pop TXFW"
## [1] "Row 499 is has no data for pop LAFW"
## [1] "Row 520 is has no data for pop LAFW"
## [1] "Row 531 is has no data for pop TXFW"
## [1] "Row 543 is has no data for pop LAFW"
## [1] "Row 545 is has no data for pop LAFW"
## [1] "Row 551 is has no data for pop FLLG"
## [1] "Row 592 is has no data for pop FLLG"
## [1] "Row 599 is has no data for pop FLLG"
## [1] "Row 609 is has no data for pop FLLG"
## [1] "Row 616 is has no data for pop FLLG"
## [1] "Row 655 is has no data for pop LAFW"
## [1] "Row 694 is has no data for pop FLLG"
## [1] "Row 698 is has no data for pop TXFW"
## [1] "Row 709 is has no data for pop FLLG"
## [1] "Row 731 is has no data for pop LAFW"
## [1] "Row 758 is has no data for pop FLCC"
## [1] "Row 775 is has no data for pop FLLG"
## [1] "Row 777 is has no data for pop LAFW"
## [1] "Row 783 is has no data for pop FLLG"
## [1] "Row 810 is has no data for pop FLLG"
## [1] "Row 811 is has no data for pop LAFW"
## [2] "Row 811 is has no data for pop FLLG"
## [1] "Row 829 is has no data for pop LAFW"
## [1] "Row 832 is has no data for pop FLLG"
## [1] "Row 851 is has no data for pop FLLG"
## [1] "Row 854 is has no data for pop LAFW"
## [2] "Row 854 is has no data for pop FLLG"
## [1] "Row 865 is has no data for pop FLLG"
## [1] "Row 874 is has no data for pop LAFW"
## [1] "Row 881 is has no data for pop TXFW"
## [1] "Row 933 is has no data for pop LAFW"
## [2] "Row 933 is has no data for pop ALFW"
## [1] "Row 937 is has no data for pop FLLG"
## [1] "Row 953 is has no data for pop FLLG"
## [1] "Row 954 is has no data for pop LAFW"
## [1] "Row 1009 is has no data for pop FLLG"
## [1] "Row 1012 is has no data for pop FLLG"
## [1] "Row 1061 is has no data for pop FLLG"
## [1] "Row 1086 is has no data for pop FLLG"
## [1] "Row 1091 is has no data for pop FLLG"
## [1] "Row 1094 is has no data for pop FLLG"
## [1] "Row 1145 is has no data for pop LAFW"
## [1] "Row 1168 is has no data for pop LAFW"
## [1] "Row 1195 is has no data for pop LAFW"
## [1] "Row 1198 is has no data for pop LAFW"
## [1] "Row 1203 is has no data for pop FLLG"
## [1] "Row 1223 is has no data for pop LAFW"
## [2] "Row 1223 is has no data for pop FLLG"
## [1] "Row 1236 is has no data for pop FLLG"
## [1] "Row 1252 is has no data for pop LAFW"
## [1] "Row 1257 is has no data for pop FLLG"
## [1] "Row 1278 is has no data for pop FLLG"
## [1] "Row 1301 is has no data for pop LAFW"
## [1] "Row 1304 is has no data for pop LAFW"
## [1] "Row 1307 is has no data for pop FLLG"
## [1] "Row 1321 is has no data for pop LAFW"
## [2] "Row 1321 is has no data for pop FLLG"
## [1] "Row 1342 is has no data for pop LAFW"
## [1] "Row 1350 is has no data for pop FLLG"
## [1] "Row 1351 is has no data for pop LAFW"
## [2] "Row 1351 is has no data for pop FLLG"
## [1] "Row 1363 is has no data for pop LAFW"
## [1] "Row 1365 is has no data for pop LAFW"
## [1] "Row 1373 is has no data for pop LAFW"
## [1] "Row 1414 is has no data for pop LAFW"
## [1] "Row 1453 is has no data for pop LAFW"
## [1] "Row 1488 is has no data for pop LAFW"
## [1] "Row 1518 is has no data for pop LAFW"
## [1] "Row 1540 is has no data for pop FLLG"
## [1] "Row 1557 is has no data for pop FLLG"
## [1] "Row 1575 is has no data for pop FLLG"
## [1] "Row 1595 is has no data for pop FLLG"
## [1] "Row 1648 is has no data for pop TXFW"
## [2] "Row 1648 is has no data for pop FLLG"
## [1] "Row 1669 is has no data for pop FLLG"
## [1] "Row 1683 is has no data for pop LAFW"
## [1] "Row 1721 is has no data for pop LAFW"
## [1] "Row 1729 is has no data for pop ALFW"
## [1] "Row 1734 is has no data for pop FLLG"
## [1] "Row 1737 is has no data for pop FLLG"
## [1] "Row 1752 is has no data for pop LAFW"
## [1] "Row 1781 is has no data for pop LAFW"
## [1] "Row 1783 is has no data for pop LAFW"
## [1] "Row 1785 is has no data for pop FLLG"
## [1] "Row 1793 is has no data for pop LAFW"
## [1] "Row 1805 is has no data for pop LAFW"
## [1] "Row 1819 is has no data for pop LAFW"
## [2] "Row 1819 is has no data for pop FLLG"
## [1] "Row 1838 is has no data for pop FLLG"
## [1] "Row 1844 is has no data for pop TXFW"
## [1] "Row 1848 is has no data for pop TXFW"
## [1] "Row 1852 is has no data for pop TXFW"
## [1] "Row 1859 is has no data for pop FLLG"
## [1] "Row 1860 is has no data for pop FLLG"
## [1] "Row 1863 is has no data for pop FLLG"
## [1] "Row 1871 is has no data for pop LAFW"
## [1] "Row 1880 is has no data for pop FLCC"
## [2] "Row 1880 is has no data for pop FLLG"
## [1] "Row 1883 is has no data for pop FLLG"
## [1] "Row 1919 is has no data for pop TXFW"
## [1] "Row 1951 is has no data for pop FLLG"
## [1] "Row 1954 is has no data for pop FLLG"
## [1] "Row 1989 is has no data for pop FLLG"
## [1] "Row 1998 is has no data for pop TXFW"
## [2] "Row 1998 is has no data for pop TXCB"
## [1] "Row 2006 is has no data for pop FLLG"
## [1] "Row 2017 is has no data for pop FLLG"
## [1] "Row 2066 is has no data for pop FLLG"
## [1] "Row 2068 is has no data for pop FLLG"
## [1] "Row 2071 is has no data for pop LAFW"
## [1] "Row 2112 is has no data for pop FLLG"
## [1] "Row 2147 is has no data for pop FLLG"
## [1] "Row 2170 is has no data for pop LAFW"
## [1] "Row 2214 is has no data for pop FLLG"
## [1] "Row 2220 is has no data for pop FLLG"
## [1] "Row 2282 is has no data for pop FLLG"
## [1] "Row 2289 is has no data for pop FLLG"
## [1] "Row 2291 is has no data for pop LAFW"
## [1] "Row 2304 is has no data for pop FLLG"
## [1] "Row 2326 is has no data for pop FLLG"
## [1] "Row 2328 is has no data for pop LAFW"
## [1] "Row 2333 is has no data for pop FLLG"
## [1] "Row 2340 is has no data for pop LAFW"
## [1] "Row 2364 is has no data for pop FLLG"
## [1] "Row 2373 is has no data for pop FLCC"
## [2] "Row 2373 is has no data for pop FLLG"
## [1] "Row 2404 is has no data for pop FLLG"
## [1] "Row 2411 is has no data for pop FLLG"
## [1] "Row 2424 is has no data for pop FLLG"
## [1] "Row 2438 is has no data for pop FLLG"
## [1] "Row 2445 is has no data for pop FLLG"
## [1] "Row 2448 is has no data for pop FLLG"
## [1] "Row 2456 is has no data for pop FLLG"
## [1] "Row 2475 is has no data for pop FLLG"
## [1] "Row 2486 is has no data for pop FLLG"
## [1] "Row 2546 is has no data for pop FLLG"
## [1] "Row 2555 is has no data for pop FLLG"
## [1] "Row 2578 is has no data for pop LAFW"
## [2] "Row 2578 is has no data for pop FLLG"
## [1] "Row 2587 is has no data for pop FLLG"
## [1] "Row 2615 is has no data for pop LAFW"
## [1] "Row 2634 is has no data for pop FLLG"
## [1] "Row 2642 is has no data for pop LAFW"
## [1] "Row 2659 is has no data for pop FLLG"
## [1] "Row 2674 is has no data for pop FLLG"
## [1] "Row 2720 is has no data for pop LAFW"
## [1] "Row 2729 is has no data for pop LAFW"
## [2] "Row 2729 is has no data for pop FLLG"
## [1] "Row 2748 is has no data for pop FLLG"
## [1] "Row 2815 is has no data for pop TXFW"
## [1] "Row 2823 is has no data for pop FLLG"
## [1] "Row 2825 is has no data for pop FLLG"
## [1] "Row 2831 is has no data for pop FLLG"
## [1] "Row 2840 is has no data for pop FLLG"
## [1] "Row 2854 is has no data for pop LAFW"
## [1] "Row 2917 is has no data for pop LAFW"
## [1] "Row 2919 is has no data for pop LAFW"
## [1] "Row 2961 is has no data for pop LAFW"
## [1] "Row 2969 is has no data for pop FLPB"
## [2] "Row 2969 is has no data for pop FLHB"
## [3] "Row 2969 is has no data for pop FLCC"
## [4] "Row 2969 is has no data for pop FLLG"
## [1] "Row 2977 is has no data for pop LAFW"
## [1] "Row 2979 is has no data for pop FLLG"
## [1] "Row 2981 is has no data for pop LAFW"
## [1] "Row 2985 is has no data for pop LAFW"
## [1] "Row 2991 is has no data for pop FLLG"
## [1] "Row 2995 is has no data for pop LAFW"
## [1] "Row 3003 is has no data for pop LAFW"
## [1] "Row 3013 is has no data for pop LAFW"
## [1] "Row 3026 is has no data for pop LAFW"
## [2] "Row 3026 is has no data for pop FLLG"
## [1] "Row 3095 is has no data for pop LAFW"
## [1] "Row 3128 is has no data for pop FLLG"
## [1] "Row 3158 is has no data for pop FLCC"
## [2] "Row 3158 is has no data for pop FLLG"
## [1] "Row 3172 is has no data for pop FLLG"
## [1] "Row 3177 is has no data for pop LAFW"
## [1] "Row 3181 is has no data for pop FLLG"
## [1] "Row 3203 is has no data for pop LAFW"
## [1] "Row 3208 is has no data for pop LAFW"
## [2] "Row 3208 is has no data for pop FLLG"
## [1] "Row 3254 is has no data for pop LAFW"
## [1] "Row 3256 is has no data for pop LAFW"
## [1] "Row 3259 is has no data for pop FLLG"
## [1] "Row 3260 is has no data for pop FLLG"
## [1] "Row 3294 is has no data for pop FLLG"
## [1] "Row 3305 is has no data for pop LAFW"
## [1] "Row 3306 is has no data for pop LAFW"
## [1] "Row 3311 is has no data for pop TXFW"
## [1] "Row 3373 is has no data for pop LAFW"
## [1] "Row 3379 is has no data for pop LAFW"
## [1] "Row 3393 is has no data for pop FLLG"
## [1] "Row 3394 is has no data for pop FLLG"
## [1] "Row 3401 is has no data for pop FLLG"
## [1] "Row 3408 is has no data for pop LAFW"
## [1] "Row 3416 is has no data for pop LAFW"
## [1] "Row 3462 is has no data for pop LAFW"
## [1] "Row 3467 is has no data for pop FLLG"
## [1] "Row 3470 is has no data for pop FLLG"
## [1] "Row 3478 is has no data for pop FLLG"
## [1] "Row 3484 is has no data for pop FLLG"
## [1] "Row 3510 is has no data for pop LAFW"
## [1] "Row 3511 is has no data for pop FLLG"
## [1] "Row 3520 is has no data for pop FLLG"
## [1] "Row 3522 is has no data for pop LAFW"
## [1] "Row 3527 is has no data for pop LAFW"
## [1] "Row 3534 is has no data for pop FLLG"
## [1] "Row 3544 is has no data for pop LAFW"
## [1] "Row 3564 is has no data for pop LAFW"
## [2] "Row 3564 is has no data for pop FLLG"
## [1] "Row 3570 is has no data for pop FLLG"
## [1] "Row 3574 is has no data for pop FLLG"
## [1] "Row 3575 is has no data for pop FLLG"
## [1] "Row 3580 is has no data for pop FLLG"
## [1] "Row 3583 is has no data for pop LAFW"
## [1] "Row 3620 is has no data for pop FLLG"
## [1] "Row 3688 is has no data for pop FLLG"
## [1] "Row 3696 is has no data for pop FLLG"
## [1] "Row 3703 is has no data for pop FLLG"
## [1] "Row 3710 is has no data for pop LAFW"
## [1] "Row 3730 is has no data for pop TXSP"
## [2] "Row 3730 is has no data for pop TXCC"
## [3] "Row 3730 is has no data for pop TXFW"
## [4] "Row 3730 is has no data for pop TXCB"
## [1] "Row 3741 is has no data for pop LAFW"
## [1] "Row 3754 is has no data for pop LAFW"
## [1] "Row 3783 is has no data for pop FLLG"
## [1] "Row 3790 is has no data for pop FLLG"
## [1] "Row 3828 is has no data for pop FLLG"
## [1] "Row 3871 is has no data for pop LAFW"
## [1] "Row 3956 is has no data for pop LAFW"
## [1] "Row 3959 is has no data for pop FLLG"
## [1] "Row 3976 is has no data for pop FLLG"
## [1] "Row 4010 is has no data for pop LAFW"
## [1] "Row 4041 is has no data for pop LAFW"
## [1] "Row 4066 is has no data for pop LAFW"
## [2] "Row 4066 is has no data for pop FLLG"
## [1] "Row 4080 is has no data for pop FLLG"
## [1] "Row 4085 is has no data for pop FLLG"
## [1] "Row 4155 is has no data for pop TXFW"
## [1] "Row 4168 is has no data for pop FLLG"
## [1] "Row 4175 is has no data for pop FLLG"
## [1] "Row 4202 is has no data for pop FLLG"
## [1] "Row 4215 is has no data for pop FLLG"
## [1] "Row 4230 is has no data for pop LAFW"
## [2] "Row 4230 is has no data for pop FLLG"
## [1] "Row 4234 is has no data for pop FLLG"
## [1] "Row 4245 is has no data for pop FLLG"
## [1] "Row 4282 is has no data for pop FLLG"
## [1] "Row 4314 is has no data for pop LAFW"
## [1] "Row 4316 is has no data for pop LAFW"
## [1] "Row 4373 is has no data for pop LAFW"
## [1] "Row 4393 is has no data for pop LAFW"
## [1] "Row 4422 is has no data for pop FLLG"
## [1] "Row 4471 is has no data for pop FLLG"
## [1] "Row 4477 is has no data for pop FLLG"
## [1] "Row 4479 is has no data for pop LAFW"
## [1] "Row 4517 is has no data for pop LAFW"
## [1] "Row 4529 is has no data for pop LAFW"
## [1] "Row 4533 is has no data for pop FLLG"
## [1] "Row 4551 is has no data for pop FLLG"
## [1] "Row 4573 is has no data for pop LAFW"
## [1] "Row 4604 is has no data for pop FLLG"
## [1] "Row 4616 is has no data for pop LAFW"
## [1] "Row 4631 is has no data for pop LAFW"
## [1] "Row 4638 is has no data for pop LAFW"
## [1] "Row 4673 is has no data for pop LAFW"
## [1] "Row 4688 is has no data for pop LAFW"
## [1] "Row 4690 is has no data for pop TXFW"
## [1] "Row 4700 is has no data for pop FLLG"
## [1] "Row 4710 is has no data for pop LAFW"
## [1] "Row 4730 is has no data for pop FLCC"
## [2] "Row 4730 is has no data for pop FLLG"
## [1] "Row 4738 is has no data for pop FLLG"
## [1] "Row 4741 is has no data for pop FLLG"
## [1] "Row 4763 is has no data for pop LAFW"
## [2] "Row 4763 is has no data for pop FLLG"
## [1] "Row 4779 is has no data for pop LAFW"
## [2] "Row 4779 is has no data for pop FLLG"
## [1] "Row 4781 is has no data for pop LAFW"
## [1] "Row 4789 is has no data for pop FLLG"
## [1] "Row 4808 is has no data for pop FLCC"
## [2] "Row 4808 is has no data for pop FLLG"
## [1] "Row 4826 is has no data for pop FLLG"
## [1] "Row 4834 is has no data for pop LAFW"
## [1] "Row 4862 is has no data for pop LAFW"
## [1] "Row 4898 is has no data for pop FLLG"
## [1] "Row 4921 is has no data for pop ALFW"
## [1] "Row 4967 is has no data for pop FLLG"
## [1] "Row 4997 is has no data for pop FLLG"
## [1] "Row 5003 is has no data for pop FLLG"
## [1] "Row 5006 is has no data for pop FLLG"
## [1] "Row 5033 is has no data for pop FLLG"
## [1] "Row 5089 is has no data for pop ALFW"
## [1] "Row 5098 is has no data for pop FLLG"
## [1] "Row 5117 is has no data for pop FLLG"
## [1] "Row 5120 is has no data for pop LAFW"
## [1] "Row 5125 is has no data for pop FLLG"
## [1] "Row 5148 is has no data for pop LAFW"
## [1] "Row 5153 is has no data for pop LAFW"
## [1] "Row 5179 is has no data for pop LAFW"
## [2] "Row 5179 is has no data for pop FLLG"
## [1] "Row 5209 is has no data for pop FLLG"
## [1] "Row 5295 is has no data for pop FLLG"
## [1] "Row 5350 is has no data for pop LAFW"
## [1] "Row 5355 is has no data for pop FLLG"
## [1] "Row 5393 is has no data for pop LAFW"
## [1] "Row 5402 is has no data for pop FLLG"
## [1] "Row 5444 is has no data for pop LAFW"
## [2] "Row 5444 is has no data for pop FLLG"
## [1] "Row 5451 is has no data for pop LAFW"
## [1] "Row 5462 is has no data for pop LAFW"
## [1] "Row 5471 is has no data for pop FLLG"
## [1] "Row 5478 is has no data for pop FLLG"
## [1] "Row 5490 is has no data for pop LAFW"
## [2] "Row 5490 is has no data for pop FLLG"
## [1] "Row 5513 is has no data for pop LAFW"
## [1] "Row 5522 is has no data for pop FLLG"
## [1] "Row 5543 is has no data for pop LAFW"
## [1] "Row 5573 is has no data for pop ALFW"
## [1] "Row 5579 is has no data for pop LAFW"
## [1] "Row 5583 is has no data for pop LAFW"
## [1] "Row 5585 is has no data for pop ALFW"
## [1] "Row 5587 is has no data for pop FLHB"
## [2] "Row 5587 is has no data for pop FLCC"
## [3] "Row 5587 is has no data for pop FLLG"
## [1] "Row 5590 is has no data for pop TXSP"
## [2] "Row 5590 is has no data for pop TXCC"
## [3] "Row 5590 is has no data for pop TXFW"
## [4] "Row 5590 is has no data for pop TXCB"
## [5] "Row 5590 is has no data for pop LAFW"
## [6] "Row 5590 is has no data for pop ALFW"
## [1] "Row 5608 is has no data for pop TXFW"
## [1] "Row 5611 is has no data for pop LAFW"
## [1] "Row 5618 is has no data for pop LAFW"
## [1] "Row 5636 is has no data for pop LAFW"
## [1] "Row 5640 is has no data for pop LAFW"
## [2] "Row 5640 is has no data for pop FLLG"
## [1] "Row 5650 is has no data for pop FLLG"
## [1] "Row 5705 is has no data for pop LAFW"
## [1] "Row 5715 is has no data for pop FLLG"
## [1] "Row 5737 is has no data for pop LAFW"
## [1] "Row 5745 is has no data for pop LAFW"
## [1] "Row 5762 is has no data for pop FLLG"
## [1] "Row 5766 is has no data for pop LAFW"
## [1] "Row 5797 is has no data for pop LAFW"
## [2] "Row 5797 is has no data for pop FLLG"
## [1] "Row 5814 is has no data for pop FLLG"
## [1] "Row 5855 is has no data for pop FLLG"
## [1] "Row 5874 is has no data for pop LAFW"
## [1] "Row 5875 is has no data for pop LAFW"
## [1] "Row 5886 is has no data for pop LAFW"
## [1] "Row 5899 is has no data for pop FLLG"
## [1] "Row 5901 is has no data for pop FLCC"
## [2] "Row 5901 is has no data for pop FLLG"
## [1] "Row 5904 is has no data for pop LAFW"
## [1] "Row 5926 is has no data for pop LAFW"
## [1] "Row 5935 is has no data for pop LAFW"
## [2] "Row 5935 is has no data for pop FLLG"
## [1] "Row 5942 is has no data for pop LAFW"
## [1] "Row 5947 is has no data for pop LAFW"
## [1] "Row 6050 is has no data for pop LAFW"
## [2] "Row 6050 is has no data for pop FLLG"
## [1] "Row 6066 is has no data for pop LAFW"
## [1] "Row 6084 is has no data for pop LAFW"
## [1] "Row 6128 is has no data for pop TXFW"
## [1] "Row 6171 is has no data for pop FLHB"
## [1] "Row 6276 is has no data for pop LAFW"
## [1] "Row 6280 is has no data for pop FLLG"
## [1] "Row 6327 is has no data for pop FLLG"
## [1] "Row 6336 is has no data for pop FLLG"
## [1] "Row 6337 is has no data for pop LAFW"
## [1] "Row 6342 is has no data for pop LAFW"
## [1] "Row 6357 is has no data for pop FLLG"
## [1] "Row 6379 is has no data for pop FLLG"
## [1] "Row 6395 is has no data for pop FLLG"
## [1] "Row 6424 is has no data for pop FLLG"
## [1] "Row 6428 is has no data for pop FLLG"
## [1] "Row 6436 is has no data for pop LAFW"
## [1] "Row 6543 is has no data for pop LAFW"
## [2] "Row 6543 is has no data for pop FLLG"
## [1] "Row 6553 is has no data for pop LAFW"
## [1] "Row 6584 is has no data for pop LAFW"
## [1] "Row 6627 is has no data for pop LAFW"
## [1] "Row 6691 is has no data for pop LAFW"
## [1] "Row 6703 is has no data for pop LAFW"
## [2] "Row 6703 is has no data for pop FLLG"
## [1] "Row 6736 is has no data for pop LAFW"
## [1] "Row 6784 is has no data for pop LAFW"
## [1] "Row 6785 is has no data for pop FLLG"
## [1] "Row 6792 is has no data for pop FLLG"
## [1] "Row 6795 is has no data for pop FLLG"
## [1] "Row 6801 is has no data for pop LAFW"
## [1] "Row 6803 is has no data for pop LAFW"
## [1] "Row 6823 is has no data for pop FLLG"
## [1] "Row 6826 is has no data for pop LAFW"
## [1] "Row 6836 is has no data for pop FLCC"
## [2] "Row 6836 is has no data for pop FLLG"
## [1] "Row 6840 is has no data for pop FLLG"
## [1] "Row 6894 is has no data for pop LAFW"
## [1] "Row 6900 is has no data for pop FLLG"
## [1] "Row 6910 is has no data for pop FLLG"
## [1] "Row 6970 is has no data for pop LAFW"
## [1] "Row 6997 is has no data for pop LAFW"
## [1] "Row 7005 is has no data for pop FLLG"
## [1] "Row 7026 is has no data for pop FLPB"
## [2] "Row 7026 is has no data for pop FLHB"
## [3] "Row 7026 is has no data for pop FLCC"
## [4] "Row 7026 is has no data for pop FLLG"
## [1] "Row 7031 is has no data for pop FLLG"
## [1] "Row 7042 is has no data for pop LAFW"
## [1] "Row 7071 is has no data for pop TXFW"
## [2] "Row 7071 is has no data for pop ALFW"
## [1] "Row 7101 is has no data for pop LAFW"
## [1] "Row 7103 is has no data for pop LAFW"
## [1] "Row 7111 is has no data for pop LAFW"
## [1] "Row 7126 is has no data for pop FLLG"
## [1] "Row 7133 is has no data for pop LAFW"
## [1] "Row 7151 is has no data for pop FLLG"
## [1] "Row 7203 is has no data for pop FLLG"
## [1] "Row 7212 is has no data for pop FLLG"
## [1] "Row 7238 is has no data for pop LAFW"
## [1] "Row 7255 is has no data for pop FLLG"
## [1] "Row 7259 is has no data for pop FLLG"
## [1] "Row 7273 is has no data for pop FLLG"
## [1] "Row 7280 is has no data for pop FLLG"
## [1] "Row 7303 is has no data for pop FLLG"
## [1] "Row 7320 is has no data for pop LAFW"
## [1] "Row 7344 is has no data for pop FLLG"
## [1] "Row 7346 is has no data for pop LAFW"
## [1] "Row 7370 is has no data for pop FLLG"
## [1] "Row 7371 is has no data for pop FLLG"
## [1] "Row 7399 is has no data for pop LAFW"
## [1] "Row 7447 is has no data for pop FLLG"
## [1] "Row 7471 is has no data for pop LAFW"
## [1] "Row 7542 is has no data for pop FLLG"
## [1] "Row 7571 is has no data for pop FLLG"
## [1] "Row 7581 is has no data for pop LAFW"
## [1] "Row 7586 is has no data for pop FLLG"
## [1] "Row 7617 is has no data for pop FLLG"
## [1] "Row 7627 is has no data for pop FLLG"
## [1] "Row 7660 is has no data for pop FLLG"
## [1] "Row 7671 is has no data for pop LAFW"
## [1] "Row 7683 is has no data for pop LAFW"
## [1] "Row 7740 is has no data for pop FLLG"
## [1] "Row 7752 is has no data for pop FLLG"
## [1] "Row 7755 is has no data for pop FLLG"
## [1] "Row 7785 is has no data for pop LAFW"
## [1] "Row 7789 is has no data for pop FLLG"
## [1] "Row 7807 is has no data for pop LAFW"
## [1] "Row 7823 is has no data for pop LAFW"
## [1] "Row 7855 is has no data for pop FLLG"
## [1] "Row 7863 is has no data for pop FLLG"
## [1] "Row 7867 is has no data for pop FLLG"
## [1] "Row 7869 is has no data for pop LAFW"
## [1] "Row 7885 is has no data for pop LAFW"
## [2] "Row 7885 is has no data for pop FLLG"
## [1] "Row 7891 is has no data for pop FLLG"
## [1] "Row 7918 is has no data for pop FLLG"
## [1] "Row 7937 is has no data for pop TXCC"
## [2] "Row 7937 is has no data for pop TXFW"
## [3] "Row 7937 is has no data for pop TXCB"
## [4] "Row 7937 is has no data for pop LAFW"
## [5] "Row 7937 is has no data for pop ALFW"
## [1] "Row 7955 is has no data for pop FLLG"
## [1] "Row 7961 is has no data for pop TXFW"
## [1] "Row 7970 is has no data for pop FLLG"
## [1] "Row 7981 is has no data for pop LAFW"
## [1] "Row 8038 is has no data for pop FLLG"
## [1] "Row 8046 is has no data for pop FLLG"
## [1] "Row 8055 is has no data for pop FLLG"
## [1] "Row 8057 is has no data for pop LAFW"
## [1] "Row 8073 is has no data for pop FLLG"
## [1] "Row 8077 is has no data for pop FLLG"
## [1] "Row 8091 is has no data for pop FLLG"
## [1] "Row 8106 is has no data for pop LAFW"
## [1] "Row 8114 is has no data for pop FLLG"
## [1] "Row 8173 is has no data for pop FLLG"
## [1] "Row 8217 is has no data for pop FLLG"
## [1] "Row 8222 is has no data for pop LAFW"
## [1] "Row 8227 is has no data for pop LAFW"
## [1] "Row 8266 is has no data for pop LAFW"
## [2] "Row 8266 is has no data for pop FLLG"
## [1] "Row 8269 is has no data for pop FLLG"
## [1] "Row 8270 is has no data for pop FLLG"
## [1] "Row 8292 is has no data for pop TXFW"
## [2] "Row 8292 is has no data for pop ALFW"
## [1] "Row 8319 is has no data for pop FLLG"
## [1] "Row 8324 is has no data for pop LAFW"
## [1] "Row 8333 is has no data for pop FLLG"
## [1] "Row 8372 is has no data for pop FLLG"
## [1] "Row 8399 is has no data for pop FLLG"
## [1] "Row 8404 is has no data for pop LAFW"
## [1] "Row 8405 is has no data for pop FLLG"
## [1] "Row 8414 is has no data for pop FLLG"
## [1] "Row 8416 is has no data for pop FLHB"
## [2] "Row 8416 is has no data for pop FLCC"
## [3] "Row 8416 is has no data for pop FLLG"
## [1] "Row 8447 is has no data for pop FLLG"
## [1] "Row 8454 is has no data for pop FLLG"
## [1] "Row 8455 is has no data for pop LAFW"
## [1] "Row 8458 is has no data for pop FLLG"
## [1] "Row 8481 is has no data for pop FLLG"
## [1] "Row 8498 is has no data for pop FLLG"
## [1] "Row 8568 is has no data for pop LAFW"
## [2] "Row 8568 is has no data for pop FLLG"
## [1] "Row 8572 is has no data for pop LAFW"
## [1] "Row 8585 is has no data for pop LAFW"
## [1] "Row 8598 is has no data for pop LAFW"
## [1] "Row 8601 is has no data for pop LAFW"
## [1] "Row 8625 is has no data for pop FLLG"
## [1] "Row 8630 is has no data for pop LAFW"
## [1] "Row 8644 is has no data for pop FLLG"
## [1] "Row 8657 is has no data for pop FLLG"
## [1] "Row 8783 is has no data for pop FLLG"
## [1] "Row 8798 is has no data for pop LAFW"
## [1] "Row 8854 is has no data for pop LAFW"
## [1] "Row 8860 is has no data for pop LAFW"
## [1] "Row 8866 is has no data for pop FLLG"
## [1] "Row 8872 is has no data for pop LAFW"
## [2] "Row 8872 is has no data for pop FLLG"
## [1] "Row 8886 is has no data for pop FLLG"
## [1] "Row 8891 is has no data for pop LAFW"
## [1] "Row 8892 is has no data for pop LAFW"
## [2] "Row 8892 is has no data for pop FLLG"
## [1] "Row 8928 is has no data for pop FLLG"
## [1] "Row 8952 is has no data for pop FLLG"
## [1] "Row 8975 is has no data for pop FLLG"
## [1] "Row 8979 is has no data for pop LAFW"
## [1] "Row 9014 is has no data for pop LAFW"
## [1] "Row 9061 is has no data for pop LAFW"
## [1] "Row 9070 is has no data for pop FLLG"
## [1] "Row 9076 is has no data for pop LAFW"
## [2] "Row 9076 is has no data for pop ALFW"
## [1] "Row 9077 is has no data for pop TXFW"
## [1] "Row 9093 is has no data for pop LAFW"
## [1] "Row 9104 is has no data for pop LAFW"
## [2] "Row 9104 is has no data for pop FLLG"
## [1] "Row 9133 is has no data for pop LAFW"
## [1] "Row 9163 is has no data for pop FLLG"
## [1] "Row 9191 is has no data for pop FLLG"
## [1] "Row 9222 is has no data for pop FLLG"
## [1] "Row 9281 is has no data for pop ALFW"
## [1] "Row 9312 is has no data for pop FLLG"
## [1] "Row 9352 is has no data for pop LAFW"
## [1] "Row 9410 is has no data for pop FLLG"
## [1] "Row 9429 is has no data for pop LAFW"
## [2] "Row 9429 is has no data for pop FLLG"
## [1] "Row 9472 is has no data for pop ALFW"
## [1] "Row 9496 is has no data for pop LAFW"
## [1] "Row 9507 is has no data for pop LAFW"
## [1] "Row 9555 is has no data for pop FLLG"
## [1] "Row 9556 is has no data for pop FLLG"
## [1] "Row 9563 is has no data for pop FLLG"
## [1] "Row 9616 is has no data for pop TXCB"
## [1] "Row 9633 is has no data for pop LAFW"
## [2] "Row 9633 is has no data for pop FLLG"
for(i in 1:10){
rowsub<-sample(nrow(vcf),1000,replace = FALSE)
gpopsub<-vcf2gpop(vcf[rowsub,colnames(vcf)!="SNP"],pop.list,paste(sub.prefix,i,".genepop",sep=""))
}
gpop<-vcf2gpop(vcf[,colnames(vcf)!="SNP"],pop.list,gpop.name)
#then run poptree on all of them
And then run poptree. Poptree ran on the full dataset as well as the subsets of 1000 SNPs. Did they all provide similar results? What does the consensus tree look like?
poptree.dir<-"poptree/"
poptree.prefix<-"p4"
poptree.png<-"p4.poptree.png"
poptree.files<-list.files(path = poptree.dir,pattern=paste(poptree.prefix,".*.nwk",sep=""))
poptree.files<-lapply(poptree.files,function(x){ paste("poptree",x,sep="/")})
poptrees<-lapply(poptree.files,read.tree)
con.poptree<-consensus(poptrees)
con.poptree$tip.label[con.poptree$tip.label=="FLLG"]<-"FLFW"
clcolr <- rep("black", dim(con.poptree$edge)[1])
#clcolr[c(12,13,14,24)]<-all.colors[3]
#png(paste(poptree.dir,poptree.prefix,".consensus.png",sep=""),height=7,width=7,units="in",res=300)
#dev.off()
png(paste(poptree.dir,poptree.prefix,".png",sep=""),height=10,width=10,units="in",res=300)
par(mfrow=c(3,4),oma=c(1,1,1,1),mar=c(1,1,1,1))
for(i in 1:length(poptrees)){
plot.phylo(poptrees[[i]],cex=1.5)
mtext(poptree.files[i],3)
}
plot.phylo(con.poptree,tip.color = c(rep(grp.colors[6],4),grp.colors[5],
rep(grp.colors[1],4),rep(grp.colors[2],3),
rep(grp.colors[3],4)),
edge.color = clcolr,edge.width = 2,cex=1,font=1)
mtext("Consensus")
dev.off()
## png
## 2
All Trees
Based on this, I’m going to move forward just with the full dataset (which includes bootstrap values).
#just the full subset tree
pt.subtree<-poptrees[[1]]
pt.subtree$tip.label[pt.subtree$tip.label=="FLLG"]<-"FLFW"
pt.colors<-pt.subtree$tip.label
pt.colors[pt.colors %in% "FLFW"]<-grp.colors[6]
pt.colors[pt.colors %in% c("FLPB","FLHB","FLCC")]<-grp.colors[6]
pt.colors[pt.colors %in% c("FLAB")]<-grp.colors[5]
pt.colors[pt.colors %in% c("FLSI","FLFD","FLKB","FLSG")]<-grp.colors[3]
pt.colors[pt.colors %in% c("ALST","ALFW","LAFW")]<-grp.colors[2]
pt.colors[pt.colors %in% c("TXSP","TXCC","TXFW","TXCB")]<-grp.colors[1]
clcolr <- rep("black", dim(pt.subtree$edge)[1])
clcolr[c(6,19,20,21,22)]<-all.colors[3]
png(poptree.png,height=7,width=7,units="in",res=300)
plot.phylo(pt.subtree,tip.color = pt.colors,
edge.color = clcolr,edge.width = 2,label.offset = 0.0015)
dev.off()
## png
## 2
PopTree
To get the Poptree distance matrix, I copied and pasted the distance matrix in the p4.out file into a new file and saved it as p4.distance.out. I had to first save each part of the matrix as text files, open them in Excel to standardize the spacings, merge them, and then save them.
pt.dist<-as.matrix(read.table("poptree/p4.distance.out",header=T,row.names=1,sep='\t'))
jostpw<-as.matrix(read.table("Subset.JostsD.tsv",header=T,sep='\t'))
pwise.fst.raw<-read.table("stacks/fwsw_fst_summary_subset.txt",header=T,row.names=1,sep='\t')#p4_fst_summary.txt
pwise.fst<-matrix(nrow=length(pop.list),ncol=length(pop.list))
dimnames(pwise.fst)[[1]]<-dimnames(pwise.fst)[[2]]<-pop.list
for(pop1 in 1:length(pop.list)){
for(pop2 in 1:length(pop.list)){
if(pop1 != pop2){
raws<-c(pwise.fst.raw[pop.list[pop1],pop.list[pop2]],pwise.fst.raw[pop.list[pop2],pop.list[pop1]])
pwise.fst[pop.list[pop1],pop.list[pop2]]<-raws[!is.na(raws)]
}
}
}
dimnames(pwise.fst)[[1]]<-dimnames(pwise.fst)[[2]]<-pop.labs
pwise.fst[lower.tri(pwise.fst)]<-NA
heatmaps.name<-"p4.heatmap.png"
colors<-c("blue","yellow","red")
pal<-colorRampPalette(colors)
ncol=80
cols<-pal(ncol)
cov<-read.table(gzfile(treemix.file), as.is = T, head = T, quote = "", comment.char = "")
#reorder
covplot <- data.frame(matrix(nrow = nrow(cov), ncol = ncol(cov)))
for(i in 1:length(pop.list)){
for( j in 1:length(pop.list)){
covplot[i, j] = cov[which(names(cov)==pop.list[i]), which(names(cov)==pop.list[j])]
rownames(covplot)[i]<-pop.list[i]
colnames(covplot)[j]<-pop.list[j]
}
}
cp<-as.matrix(covplot)
cp[lower.tri(cp)]<-NA
cp[upper.tri(cp)]<-covplot[upper.tri(covplot)]
colnames(cp)<-pop.labs
rownames(cp)<-pop.labs
cp.lv<-levelplot(cp,col.regions=cols,alpha.regions=0.7,
scales = list(x=list(rot=90),tck = 0),xlab="",ylab="")
dimnames(pt.dist)[[1]]<-dimnames(pt.dist)[[2]]<-pop.labs
dimnames(jostpw)[[1]]<-dimnames(jostpw)[[2]]<-pop.labs
colors<-c("black","darkgrey","grey","lightgrey","cornflowerblue")
pal<-colorRampPalette(colors)
ncol=80
cols<-pal(ncol)
rev.colors<-c("cornflowerblue","lightgrey","grey","darkgrey","black")
rev.pal<-colorRampPalette(rev.colors)
rev.cols<-rev.pal(ncol)
hm.height<-list(x=3.8,units="in")#2.2
hm.width<-list(x=3.9,units="in")#2.4 in RStudio
png(heatmaps.name,height=11,width=11,units="in",res=300)
fst.lv<-levelplot(as.matrix(pwise.fst),col.regions=cols,alpha.regions=0.7,
scales = list(x=list(rot=90),tck = 0),xlab="",ylab="")
print(fst.lv,split=c(1,1,2,2),more=TRUE,panel.width=hm.width,
panel.height=hm.height,cex=2)
trellis.focus("legend", side="right", clipp.off=TRUE, highlight=FALSE)
grid.text(expression(italic(F)[ST]), 0.2, 0, hjust=0.5, vjust=1.2,gp=gpar(cex=0.75))
trellis.unfocus()
cp.lv<-levelplot(cp,col.regions=rev.cols,alpha.regions=0.7,
scales = list(x=list(rot=90),tck = 0),xlab="",ylab="")
print(cp.lv,split=c(1,2,2,2),more=FALSE,newpage=FALSE,panel.width=hm.width,
panel.height=hm.height,cex=2)
trellis.focus("legend", side="right", clipp.off=TRUE, highlight=FALSE)
grid.text("covariance", 0.2, 0, hjust=0.5, vjust=1.2,gp=gpar(cex=0.75))
trellis.unfocus()
jost.lv<-levelplot(jostpw,col.regions=cols,alpha.regions=0.7,
scales = list(x=list(rot=90),tck = 0),xlab="",ylab="")
print(jost.lv,split=c(2,1,2,2),more=FALSE,newpage=FALSE,panel.width=hm.width,
panel.height=hm.height,cex=2)
trellis.focus("legend", side="right", clipp.off=TRUE, highlight=FALSE)
grid.text(expression("Jost's"~italic(D)), 0.2, 0, hjust=0.5, vjust=1.2,gp=gpar(cex=0.75))
trellis.unfocus()
ptdist.lv<-levelplot(pt.dist,col.regions=cols,alpha.regions=0.7,
scales = list(x=list(rot=90),tck = 0),xlab="",ylab="")
print(ptdist.lv,split=c(2,2,2,2),more=FALSE,newpage=FALSE,panel.width=hm.width,
panel.height=hm.height,cex=2)
trellis.focus("legend", side="right", clipp.off=TRUE, highlight=FALSE)
grid.text("PopTree2\nDistance", 0.2, 0, hjust=0.5, vjust=1.2,gp=gpar(cex=0.75))
trellis.unfocus()
dev.off()
Figure 3. Heatmap
Figure 4 is the Poptree2 and Treemix trees next to each other
plotName<-"p4.trees.png"
tm.tree<-"treemix/p4_k100bFLPBrm4"
pt.subtree<-read.tree("poptree/p4.nwk")
rect.start<-0.1
clcolr <- rep("grey27", dim(pt.subtree$edge)[1])
clcolr[c(6,19,20,21,22)]<-all.colors[3]
pt.subtree$node.label<-round(as.numeric(pt.subtree$node.label)*100)
pt.subtree$tip.label[pt.subtree$tip.label=="FLLG"]<-"FLFW"
png(plotName,height=5,width=10,units="in",res=300)
par(mfrow=c(1,2),oma=c(1,0,1,0),mar=c(1,1,1,0.1))
plot.phylo(pt.subtree,tip.color = pt.colors,#align.tip.label = T,#show.node.label = TRUE,
edge.color = clcolr,edge.width = 2,label.offset = 0.0015,font=1)
nodelabels(pt.subtree$node.label,cex=0.75,font=2,frame="none",adj=c(1,-0.2))
mtext("PopTree2",3)
t3<-plot_tree(tm.tree,"treemix/poporder",plus=0.05,scale=F,mbar=F,arrow=0.1,
tip.order = tip.names,lwd = 2,branch.cols = branch.cols,xlab=F)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
## 1 1 FLSG NOT_ROOT NOT_MIG TIP 16 NA NA NA NA
## 2 2 <NA> NOT_ROOT NOT_MIG NOT_TIP 52 51 1 16 8
## 3 3 TXCC NOT_ROOT NOT_MIG TIP 172 NA NA NA NA
## 4 4 FLFD NOT_ROOT NOT_MIG TIP 256 NA NA NA NA
## 5 15 FLPB NOT_ROOT NOT_MIG TIP 473 NA NA NA NA
## 6 16 <NA> NOT_ROOT NOT_MIG NOT_TIP 2 1 1 304 7
## 7 31 FLLG NOT_ROOT NOT_MIG TIP 412 NA NA NA NA
## 8 32 <NA> NOT_ROOT NOT_MIG NOT_TIP 473 52 12 212 3
## 9 51 FLKB NOT_ROOT NOT_MIG TIP 2 NA NA NA NA
## 10 52 <NA> NOT_ROOT NOT_MIG NOT_TIP 32 2 9 256 3
## 11 75 TXFW NOT_ROOT NOT_MIG TIP 104 NA NA NA NA
## 12 76 <NA> NOT_ROOT NOT_MIG NOT_TIP 304 472 2 303 1
## 13 103 ALFW NOT_ROOT NOT_MIG TIP 472 NA NA NA NA
## 14 104 <NA> NOT_ROOT NOT_MIG NOT_TIP 304 75 1 356 3
## 15 135 FLAB NOT_ROOT NOT_MIG TIP 588 NA NA NA NA
## 16 136 <NA> NOT_ROOT MIG NOT_TIP 32 52 NA NA NA
## 17 171 TXSP NOT_ROOT NOT_MIG TIP 172 NA NA NA NA
## 18 172 <NA> NOT_ROOT NOT_MIG NOT_TIP 356 3 1 171 1
## 19 211 FLHB NOT_ROOT NOT_MIG TIP 212 NA NA NA NA
## 20 212 <NA> NOT_ROOT NOT_MIG NOT_TIP 32 211 1 412 2
## 21 255 FLSI NOT_ROOT NOT_MIG TIP 588 NA NA NA NA
## 22 256 <NA> NOT_ROOT NOT_MIG NOT_TIP 52 4 1 588 2
## 23 303 ALST NOT_ROOT NOT_MIG TIP 76 NA NA NA NA
## 24 304 <NA> NOT_ROOT NOT_MIG NOT_TIP 16 104 4 76 3
## 25 355 TXCB NOT_ROOT NOT_MIG TIP 356 NA NA NA NA
## 26 356 <NA> NOT_ROOT NOT_MIG NOT_TIP 104 172 2 355 1
## 27 411 FLCC NOT_ROOT NOT_MIG TIP 412 NA NA NA NA
## 28 412 <NA> NOT_ROOT NOT_MIG NOT_TIP 212 411 1 31 1
## 29 471 LAFW NOT_ROOT NOT_MIG TIP 472 NA NA NA NA
## 30 472 <NA> NOT_ROOT NOT_MIG NOT_TIP 76 471 1 103 1
## 31 473 <NA> ROOT NOT_MIG NOT_TIP 473 15 1 32 15
## 32 490 <NA> NOT_ROOT MIG NOT_TIP 104 75 NA NA NA
## 33 546 <NA> NOT_ROOT MIG NOT_TIP 412 31 NA NA NA
## 34 588 <NA> NOT_ROOT NOT_MIG NOT_TIP 256 255 1 135 1
## 35 641 <NA> NOT_ROOT MIG NOT_TIP 104 75 NA NA NA
## V11
## 1 FLSG:0.000248367
## 2 (FLKB:0.000474655,(FLSG:0.000248367,((TXFW:0.0269586,((TXCC:0.000281411,TXSP:0.00121161):0.00205677,TXCB:0.00319305):0.00396619):0.00747066,((LAFW:0.0118029,ALFW:0.00569026):0.0176978,ALST:0):0.00502786):0.0120792):0.000664825):0.00127358
## 3 TXCC:0.000281411
## 4 FLFD:0.000926406
## 5 FLPB:0
## 6 (FLSG:0.000248367,((TXFW:0.0269586,((TXCC:0.000281411,TXSP:0.00121161):0.00205677,TXCB:0.00319305):0.00396619):0.00747066,((LAFW:0.0118029,ALFW:0.00569026):0.0176978,ALST:0):0.00502786):0.0120792):0.000664825
## 7 FLLG:0.0510187
## 8 (((FLKB:0.000474655,(FLSG:0.000248367,((TXFW:0.0269586,((TXCC:0.000281411,TXSP:0.00121161):0.00205677,TXCB:0.00319305):0.00396619):0.00747066,((LAFW:0.0118029,ALFW:0.00569026):0.0176978,ALST:0):0.00502786):0.0120792):0.000664825):0.00127358,(FLFD:0.000926406,(FLSI:8.2722e-05,FLAB:0.0146583):0.00107509):0.00123607):0.0203382,(FLHB:0.000682617,(FLCC:0.00110869,FLLG:0.0510187):0.000504332):0.00107155):0
## 9 FLKB:0.000474655
## 10 ((FLKB:0.000474655,(FLSG:0.000248367,((TXFW:0.0269586,((TXCC:0.000281411,TXSP:0.00121161):0.00205677,TXCB:0.00319305):0.00396619):0.00747066,((LAFW:0.0118029,ALFW:0.00569026):0.0176978,ALST:0):0.00502786):0.0120792):0.000664825):0.00127358,(FLFD:0.000926406,(FLSI:8.2722e-05,FLAB:0.0146583):0.00107509):0.00123607):0.0203382
## 11 TXFW:0.0269586
## 12 ((LAFW:0.0118029,ALFW:0.00569026):0.0176978,ALST:0):0.00502786
## 13 ALFW:0.00569026
## 14 (TXFW:0.0269586,((TXCC:0.000281411,TXSP:0.00121161):0.00205677,TXCB:0.00319305):0.00396619):0.00747066
## 15 FLAB:0.0146583
## 16 <NA>
## 17 TXSP:0.00121161
## 18 (TXCC:0.000281411,TXSP:0.00121161):0.00205677
## 19 FLHB:0.000682617
## 20 (FLHB:0.000682617,(FLCC:0.00110869,FLLG:0.0510187):0.000504332):0.00107155
## 21 FLSI:8.2722e-05
## 22 (FLFD:0.000926406,(FLSI:8.2722e-05,FLAB:0.0146583):0.00107509):0.00123607
## 23 ALST:0
## 24 ((TXFW:0.0269586,((TXCC:0.000281411,TXSP:0.00121161):0.00205677,TXCB:0.00319305):0.00396619):0.00747066,((LAFW:0.0118029,ALFW:0.00569026):0.0176978,ALST:0):0.00502786):0.0120792
## 25 TXCB:0.00319305
## 26 ((TXCC:0.000281411,TXSP:0.00121161):0.00205677,TXCB:0.00319305):0.00396619
## 27 FLCC:0.00110869
## 28 (FLCC:0.00110869,FLLG:0.0510187):0.000504332
## 29 LAFW:0.0118029
## 30 (LAFW:0.0118029,ALFW:0.00569026):0.0176978
## 31 (FLPB:0,(((FLKB:0.000474655,(FLSG:0.000248367,((TXFW:0.0269586,((TXCC:0.000281411,TXSP:0.00121161):0.00205677,TXCB:0.00319305):0.00396619):0.00747066,((LAFW:0.0118029,ALFW:0.00569026):0.0176978,ALST:0):0.00502786):0.0120792):0.000664825):0.00127358,(FLFD:0.000926406,(FLSI:8.2722e-05,FLAB:0.0146583):0.00107509):0.00123607):0.0203382,(FLHB:0.000682617,(FLCC:0.00110869,FLLG:0.0510187):0.000504332):0.00107155):0);
## 32 <NA>
## 33 <NA>
## 34 (FLSI:8.2722e-05,FLAB:0.0146583):0.00107509
## 35 <NA>
## x y ymin ymax
## 1 0.022525012 0.84375 0.8125 0.8750
## 2 0.021611820 0.87500 0.3750 0.9375
## 3 0.046415117 0.71875 0.6875 0.7500
## 4 0.022500716 0.34375 0.3125 0.3750
## 5 0.000000000 0.96875 0.9375 1.0000
## 6 0.022276645 0.81250 0.3750 0.8750
## 7 0.052594582 0.03125 0.0000 0.0625
## 8 0.000000000 0.18750 0.0000 0.9375
## 9 0.022086475 0.90625 0.8750 0.9375
## 10 0.020338240 0.37500 0.1875 0.9375
## 11 0.067069276 0.78125 0.7500 0.8125
## 12 0.037667946 0.43750 0.3750 0.5625
## 13 0.053147151 0.46875 0.4375 0.5000
## 14 0.040110746 0.75000 0.5625 0.8125
## 15 0.029966867 0.21875 0.1875 0.2500
## 16 0.011480800 NA 0.0000 0.1875
## 17 0.047345316 0.65625 0.6250 0.6875
## 18 0.046133706 0.68750 0.6250 0.7500
## 19 0.001754167 0.15625 0.1250 0.1875
## 20 0.001071550 0.12500 0.0000 0.1875
## 21 0.022732122 0.28125 0.2500 0.3125
## 22 0.021574310 0.31250 0.1875 0.3750
## 23 0.037667946 0.40625 0.3750 0.4375
## 24 0.032640086 0.56250 0.3750 0.8125
## 25 0.046647245 0.59375 0.5625 0.6250
## 26 0.044076936 0.62500 0.5625 0.7500
## 27 0.002684572 0.09375 0.0625 0.1250
## 28 0.001575882 0.06250 0.0000 0.1250
## 29 0.059259791 0.53125 0.5000 0.5625
## 30 0.047456891 0.50000 0.4375 0.5625
## 31 0.000000000 0.93750 0.0000 1.0000
## 32 0.064357446 NA 0.5625 0.7500
## 33 0.022581282 NA 0.0000 0.0625
## 34 0.022649400 0.25000 0.1875 0.3125
## 35 0.067069276 NA 0.5625 0.7500
## [1] "0.564492 0 0.02033824"
## [1] "0.899407 0.0401107462921379 0.0670692762921379"
## [1] "0.41172 0.001575882 0.052594582"
## [1] "0.899407 0.0401107462921379 0.0670692762921379"
## [1] 0.022525012 0.046415117 0.022500716 0.000000000 0.052594582
## [6] 0.022086475 0.067069276 0.053147151 0.029966867 0.047345316
## [11] 0.001754167 0.022732122 0.037667946 0.046647245 0.002684572
## [16] 0.059259791
## [1] 0.003
mtext("Treemix",3)
ybar<-0.01
mcols = rev( heat.colors(150) )
mcols = mcols[50:length(mcols)]
ymi = ybar+rect.start
yma = ybar+0.3
l = 0.2
w = l/100
xma = max(t3$d$x/20)
rect( rep(rect.start, 100), ymi+(0:99)*w, rep(rect.start+xma, 100), ymi+(1:100)*w, col = mcols, border = mcols)
text(rect.start+xma+0.001, ymi, lab = "0", adj = 0)
text(rect.start+xma+0.001, yma, lab = "0.5", adj = 0)
text(rect.start, yma+0.07, lab = "Migration", adj = 0 )
text(rect.start, yma+0.04, lab = "weight", adj = 0 )
dev.off()
## png
## 2
Figure 4. Trees
I need to re-run populations because I don’t seem to have the correct fst files. I can use the whitelist and hopefully include bootstrapping and kernel smoothing:
populations -b 2 -W fwsw_results/subset.whitelist.txt -P fwsw_results/stacks -M fwsw_pops_map.txt --fstats --vcf_haplotypes --genomic --bootstrap -k
And then I will identify the shared outliers among them. But first, let’s define some names.
vcf<-parse.vcf("p4.upd.vcf")
vcf$SNP<-paste(vcf$`#CHROM`,vcf$POS,sep=".")
scaffs<-levels(as.factor(vcf[,1]))
scaffs[1:22]<-lgs
scaff.starts<-tapply(vcf$POS,vcf$`#CHROM`,max)
scaff.starts<-data.frame(rbind(cbind(names(scaff.starts),scaff.starts)),stringsAsFactors = F)
locus.info<-c(colnames(vcf[1:9]),"SNP")
dd.plot.name<-as.character("separate_delta-divergence.png")
dd.name<-as.character("sep.deltadivergence.txt")
sdd.name<-as.character("sep.smoothedDD.out.txt")
afs.plot.name<-as.character("p4.All_AFS.png")
stacks.sig.out<-"p4.stacks.sig.snps.txt"
annotations.name<-"p4.StacksFWSWOutliers_annotatedByGenome.csv"
bf.file<-"bayenv/p4.bf.txt"
fwsw.tx<-read.delim("stacks/p4.fst_TXCC-TXFW.txt")
fwsw.la<-read.delim("stacks/p4.fst_ALST-LAFW.txt")
fwsw.al<-read.delim("stacks/p4.fst_ALFW-ALST.txt")
fwsw.fl<-read.delim("stacks/p4.fst_FLCC-FLLG.txt")
fwsw<-read.delim("stacks/fw-sw_populations/batch_2.fst_marine-freshwater.tsv")
#Compare neighboring pops.
tx.sig<-fwsw.tx[fwsw.tx$Fisher.s.P<0.01,"Locus.ID"]
la.sig<-fwsw.la[fwsw.la$Fisher.s.P<0.01,"Locus.ID"]
al.sig<-fwsw.al[fwsw.al$Fisher.s.P<0.01,"Locus.ID"]
fl.sig<-fwsw.fl[fwsw.fl$Fisher.s.P<0.01,"Locus.ID"]
length(tx.sig[(tx.sig %in% c(la.sig,al.sig,fl.sig))])
## [1] 962
length(la.sig[(la.sig %in% c(tx.sig,al.sig,fl.sig))])
## [1] 579
length(al.sig[(al.sig %in% c(la.sig,tx.sig,fl.sig))])
## [1] 679
all.shared<-fl.sig[fl.sig %in% la.sig & fl.sig %in% al.sig & fl.sig %in% tx.sig]
fw.shared.chr<-fwsw.tx[fwsw.tx$Locus.ID %in% all.shared,c("Locus.ID","Chr","BP","Column","Overall.Pi")]
tapply(fw.shared.chr$Locus.ID,factor(fw.shared.chr$Chr),function(x){ length(unique(x)) })
## LG1 LG10 LG12 LG13 LG14
## 5 1 5 1 6
## LG18 LG19 LG2 LG20 LG3
## 2 1 4 3 1
## LG4 LG6 LG9 scaffold_1679 scaffold_246
## 14 3 1 1 1
## scaffold_409 scaffold_665 scaffold_727 scaffold_856
## 1 1 1 1
#are they using the same SNPs or different SNPs?
stacks.sig<-data.frame(nrow=nrow(fw.shared.chr),ncol=4)
for(i in 1:nrow(fw.shared.chr)){
tx.bp<-fwsw.tx[fwsw.tx$Fisher.s.P<0.01 & fwsw.tx$Locus.ID == fw.shared.chr[i,"Locus.ID"],"BP"]
la.bp<-fwsw.la[fwsw.la$Fisher.s.P<0.01 & fwsw.la$Locus.ID == fw.shared.chr[i,"Locus.ID"],"BP"]
al.bp<-fwsw.al[fwsw.al$Fisher.s.P<0.01 & fwsw.al$Locus.ID == fw.shared.chr[i,"Locus.ID"],"BP"]
fl.bp<-fwsw.fl[fwsw.fl$Fisher.s.P<0.01 & fwsw.fl$Locus.ID == fw.shared.chr[i,"Locus.ID"],"BP"]
stacks.sig[i,1]<-paste(tx.bp,sep=",",collapse = ",")
stacks.sig[i,2]<-paste(la.bp,sep=",",collapse = ",")
stacks.sig[i,3]<-paste(al.bp,sep=",",collapse = ",")
stacks.sig[i,4]<-paste(fl.bp,sep=",",collapse = ",")
}
colnames(stacks.sig)<-c("TX","LA","AL","FL")
stacks.sig<-data.frame(cbind(fw.shared.chr,stacks.sig))
stacks.sig$SNP<-paste(stacks.sig$Chr,stacks.sig$BP,sep=".")
write.table(stacks.sig,stacks.sig.out,sep='\t',row.names=FALSE,col.names=TRUE)
#+ annotations, eval=FALSE
gff<-read.delim(gzfile("../../scovelli_genome/ssc_2016_12_20_chromlevel.gff.gz"),header=F)
colnames(gff)<-c("seqname","source","feature","start","end","score","strand","frame","attribute")
genome.blast<-read.csv("../../scovelli_genome/ssc_2016_12_20_cds_nr_blast_results.csv",skip=1,header=T)#I saved it as a csv
#' extract the significant regions from the gff file
fw.sig.reg<-do.call(rbind,apply(fw.shared.chr,1,function(sig){
this.gff<-gff[as.character(gff$seqname) %in% as.character(unlist(sig["Chr"])),]
this.reg<-this.gff[this.gff$start <= as.numeric(sig["BP"]) & this.gff$end >= as.numeric(sig["BP"]),]
if(nrow(this.reg) == 0){
if(as.numeric(sig["BP"])>max(as.numeric(this.gff$end))){
new<-data.frame(Locus=sig["Locus.ID"],Chr=sig["Chr"],BP=sig["BP"],SNPCol=sig["Column"],
region="beyond.last.contig", description=NA,SSCID=NA)
}else{
new<-data.frame(Locus=sig["Locus.ID"],Chr=sig["Chr"],BP=sig["BP"],SNPCol=sig["Column"],
region=NA,description=NA,SSCID=NA)
}
}else{
if(length(grep("SSCG\\d+",this.reg$attribute))>0){
geneID<-unique(gsub(".*(SSCG\\d+).*","\\1",this.reg$attribute[grep("SSCG\\d+",this.reg$attribute)]))
gene<-genome.blast[genome.blast$sscv4_gene_ID %in% geneID,"blastp_hit_description"]
}else{
geneID<-NA
gene<-NA
}
new<-data.frame(Locus=sig["Locus.ID"],Chr=sig["Chr"],BP=sig["BP"],SNPCol=sig["Column"],
region=paste(this.reg$feature,sep=",",collapse = ","),description=gene,SSCID=geneID)
}
return(as.data.frame(new))
}))
## Warning in data.frame(Locus = sig["Locus.ID"], Chr = sig["Chr"], BP =
## sig["BP"], : row names were found from a short variable and have been
## discarded
write.csv(fw.sig.reg,annotations.name)
fw.sig.reg<-read.csv(annotations.name)
There are 53 shared SNPs using the Fisher’s P as a cutoff.
The bayes factors and Fsts are all in the following chunk.
fwsw<-read.delim("stacks/fw-sw_populations/batch_2.fst_marine-freshwater.tsv")
#Compare neighboring pops.
tx.sig<-fwsw.tx[fwsw.tx$Fisher.s.P<0.01,"Locus.ID"]
la.sig<-fwsw.la[fwsw.la$Fisher.s.P<0.01,"Locus.ID"]
al.sig<-fwsw.al[fwsw.al$Fisher.s.P<0.01,"Locus.ID"]
fl.sig<-fwsw.fl[fwsw.fl$Fisher.s.P<0.01,"Locus.ID"]
length(tx.sig[(tx.sig %in% c(la.sig,al.sig,fl.sig))])
## [1] 962
length(la.sig[(la.sig %in% c(tx.sig,al.sig,fl.sig))])
## [1] 579
length(al.sig[(al.sig %in% c(la.sig,tx.sig,fl.sig))])
## [1] 679
all.shared<-fl.sig[fl.sig %in% la.sig & fl.sig %in% al.sig & fl.sig %in% tx.sig]
bf<-read.delim(bf.file)
bf.co<-apply(bf[,5:7],2,quantile,0.99) #focus on Bayes Factors, because of Lotterhos & Whitlock (2015)
sal.bf.sig<-bf[bf$Salinity_BF>bf.co["Salinity_BF"],c(1,2,4,8,9,6)]
stacks.sig<-read.delim(stacks.sig.out)
Figure 5 includes Fst data, Bayes Factors data, colors from Structure, and reciprocal monophyly data, plus smoothed Fst values (which I think I will leave out this round).
Now I’m ready to plot Figure 5.
fig5.name<-"p4_stacks_fsts_fwsw_bf.png"
addLines<-FALSE
addSmooth<-TRUE
#' Set up the plotting utilities
all.chr<-data.frame(Chr=c(as.character(fwsw.tx$Chr),as.character(fwsw.la$Chr),
as.character(fwsw.al$Chr),as.character(fwsw.fl$Chr),
as.character(bf$scaffold)),
BP=c(fwsw.tx$BP,fwsw.la$BP,fwsw.al$BP,fwsw.fl$BP,bf$BP),stringsAsFactors = F)
bounds<-tapply(as.numeric(as.character(all.chr$BP)), all.chr$Chr,max)
bounds<-data.frame(Chrom=dimnames(bounds),End=bounds)
colnames(bounds)<-c("Chrom","End")
plot.scaffs<-scaffs[scaffs %in% bounds$Chr]
plot.scaffs[1:22]<-lgs
bounds<-bounds[match(plot.scaffs,bounds$Chrom),]
#generate info
fwsw.plot<-assign.plotpos(fwsw,plot.scaffs,bounds,df.bp="BP",df.chrom = "Chr")
addLines<-TRUE
addLines<-FALSE
addSmooth<-TRUE
#' plot with the outlier regions
#' Does NOT include monophyletic neighborjoining trees.
png(fig5.name,height=6,width=8,units="in",res=300)
par(mfrow=c(5,1),mar=c(0.85,2,0,0.5),oma=c(1,1,1,0.5))
#par(fig=c(0,1,0.9-0.9/5,0.9))
fwswt.fst<-fst.plot(fwsw.tx,fst.name = "Corrected.AMOVA.Fst", bp.name = "BP",chrom.name = "Chr",
scaffs.to.plot=plot.scaffs, y.lim = c(0,1),scaffold.widths = bounds,pch=19,
pt.cols = c(grp.colors[1],grp.colors[2]),pt.cex=1,axis.size = 1)
if(addLines==TRUE){ perlg.add.lines(fwsw.plot,lgs) }
if(addSmooth==TRUE){ points(fwswt.fst$plot.pos,fwswt.fst$Smoothed.Fst,col="cornflowerblue",type="l") }
#points(fwswt.fst$BP,fwswt.fst$Corrected.AMOVA.Fst,pch=21,bg=grp.colors[1])
#points(fwswt.fst$plot.pos[fwswt.fst$Locus.ID %in% all.shared],fwswt.fst$Corrected.AMOVA.Fst[fwswt.fst$Locus.ID %in% all.shared],
# pch=1,col="black",cex=1.3)
clip(0,max(fwswt.fst$plot.pos),0,1)
abline(v=fwswt.fst$plot.pos[fwswt.fst$Locus.ID %in% all.shared],col="gray47")
mtext("TXFW vs. TXCC",2,cex=0.75)#,line=-1)
labs<-tapply(fwswt.fst$plot.pos,fwswt.fst$Chr,median)
text(x=labs[lgs],y=-0.1,labels=lgn,xpd=TRUE)
fwswl.fst<-fst.plot(fwsw.la,fst.name = "Corrected.AMOVA.Fst", bp.name = "BP",chrom.name = "Chr",
scaffs.to.plot=plot.scaffs, y.lim = c(0,1),scaffold.widths = bounds,pch=19,
pt.cols=c(grp.colors[2],grp.colors[3]),pt.cex=1,axis.size=1)
if(addLines==TRUE){ perlg.add.lines(fwsw.plot,lgs) }
if(addSmooth==TRUE){ points(fwswl.fst$plot.pos,fwswl.fst$Smoothed.Fst,col="cornflowerblue",type="l") }
#points(fwswl.fst$BP,fwswl.fst$Corrected.AMOVA.Fst,pch=21,bg=grp.colors[3])
#points(fwswl.fst$plot.pos[fwswl.fst$Locus.ID %in% all.shared],fwswl.fst$Corrected.AMOVA.Fst[fwswl.fst$Locus.ID %in% all.shared],
# pch=1,col="black",cex=1.3)
clip(0,max(fwswl.fst$plot.pos),0,1)
abline(v=fwswl.fst$plot.pos[fwswl.fst$Locus.ID %in% all.shared],col="gray47")
mtext("LAFW vs. ALST",2,cex=0.75)#,line=-1)
labs<-tapply(fwswl.fst$plot.pos,fwswl.fst$Chr,median)
text(x=labs[lgs],y=-0.1,labels=lgn,xpd=TRUE)
fwswa.fst<-fst.plot(fwsw.al,fst.name = "Corrected.AMOVA.Fst", bp.name = "BP",chrom.name = "Chr",
scaffs.to.plot=plot.scaffs, y.lim = c(0,1),scaffold.widths = bounds,pch=19,
pt.cols=c(grp.colors[3],grp.colors[2]),pt.cex=1,axis.size = 1)
if(addLines==TRUE){ perlg.add.lines(fwsw.plot,lgs) }
if(addSmooth==TRUE){ points(fwswa.fst$plot.pos,fwswa.fst$Smoothed.Fst,col="cornflowerblue",type="l") }
#points(fwswa.fst$BP,fwswa.fst$Corrected.AMOVA.Fst,pch=21,bg=grp.colors[4])
#points(fwswa.fst$plot.pos[fwswa.fst$Locus.ID %in% all.shared],fwswa.fst$Corrected.AMOVA.Fst[fwswa.fst$Locus.ID %in% all.shared],
# pch=1,col="black",cex=1.3)
clip(0,max(fwswa.fst$plot.pos),0,1)
abline(v=fwswa.fst$plot.pos[fwswa.fst$Locus.ID %in% all.shared],col="gray47")
mtext("ALFW vs. ALST",2,cex=0.75)#,line=-1)
labs<-tapply(fwswa.fst$plot.pos,fwswa.fst$Chr,median)
text(x=labs[lgs],y=-0.1,labels=lgn,xpd=TRUE)
fwswf.fst<-fst.plot(fwsw.fl,fst.name = "Corrected.AMOVA.Fst", bp.name = "BP",chrom.name = "Chr",
scaffs.to.plot=plot.scaffs, y.lim = c(0,1),scaffold.widths = bounds,pch=19,
pt.cols=c(grp.colors[6],grp.colors[5]),pt.cex=1,axis.size=1)
if(addLines==TRUE){ perlg.add.lines(fwsw.plot,lgs) }
if(addSmooth==TRUE){ points(fwswf.fst$plot.pos,fwswf.fst$Smoothed.Fst,col="cornflowerblue",type="l") }
#points(fwswf.fst$BP,fwswf.fst$Corrected.AMOVA.Fst,pch=21,bg=grp.colors[6])
#points(fwswf.fst$plot.pos[fwswf.fst$Locus.ID %in% all.shared],fwswf.fst$Corrected.AMOVA.Fst[fwswf.fst$Locus.ID %in% all.shared],
# pch=1,col="black",cex=1.3)
clip(0,max(fwswf.fst$plot.pos),0,1)
abline(v=fwswf.fst$plot.pos[fwswf.fst$Locus.ID %in% all.shared],col="gray47")
mtext("FLFW vs. FLCC",2,cex=0.75)#,line=-1)
mtext(expression(bold(italic(F)[ST])),2,outer=T,line=-1,cex=0.75)
labs<-tapply(fwswf.fst$plot.pos,fwswf.fst$Chr,median)
text(x=labs[lgs],y=-0.1,labels=lgn,xpd=TRUE)
#BF
bs.sal<-fst.plot(bf,fst.name="logSal",chrom.name="scaffold",bp.name = "BP",scaffold.widths=bounds,
scaffs.to.plot = plot.scaffs,pch=19,axis.size = 1,pt.cex = 1)
points(bs.sal[bs.sal$SNP %in% sal.bf.sig$SNP,"plot.pos"],
bs.sal[bs.sal$SNP %in% sal.bf.sig$SNP,"logSal"],
col="cornflowerblue",pch=19)#give the outliers a color
clip(0,max(bs.sal$plot.pos),-3,241)
abline(v=fwswf.fst$plot.pos[fwswf.fst$Locus.ID %in% all.shared],col="gray47")
mtext("log(Salinity BF)",2,cex=0.75,line=2.1)
#points(bs.sal[bs.sal$SNP %in% stacks.sig$SNP,"plot.pos"],
# bs.sal[bs.sal$SNP %in% stacks.sig$SNP,"logSal"],
# col="cornflowerblue",cex=1.3)
#clip(min(bs.sal$plot.pos),max(bs.sal$plot.pos),
# min(bs.sal$logSal),max(bs.sal$logSal))
#abline(h=log(bf.co["Salinity_BF"]),col="cornflowerblue",lwd=2)
#add chromosome labels
labs<-tapply(bs.sal$plot.pos,bs.sal$scaffold,median)
text(x=labs[lgs],y=-25,labels=lgn,xpd=TRUE)
dev.off()
## png
## 2
Figure 5. Fsts
There seems to be an overabundance of shared outliers on LG4. I can test this using the multinomial distribution.
if(!("stacks.sig" %in% ls())){
stacks.sig<-read.delim("p4.stacks.sig.snps.txt")
}
if(is.null(stacks.sig$SNP)){ #make sure the SNP column is there.
stacks.sig$SNP<-paste(stacks.sig$Chr,as.numeric(stacks.sig$BP)+1,sep=".")
}
sig.chrom<-stacks.sig[stacks.sig$Chr %in% lgs,]
sig.chrom$Chr<-factor(sig.chrom$Chr)
nloci<-nrow(sig.chrom)
nchrom<-length(lgs)
exp.perchrom<-rep(nloci/length(lgs),length(lgs))
names(exp.perchrom)<-lgs
exp.perchrom
## LG1 LG2 LG3 LG4 LG5 LG6 LG7 LG8
## 2.136364 2.136364 2.136364 2.136364 2.136364 2.136364 2.136364 2.136364
## LG9 LG10 LG11 LG12 LG13 LG14 LG15 LG16
## 2.136364 2.136364 2.136364 2.136364 2.136364 2.136364 2.136364 2.136364
## LG17 LG18 LG19 LG20 LG21 LG22
## 2.136364 2.136364 2.136364 2.136364 2.136364 2.136364
obs.perchrom<-tapply(sig.chrom$Locus.ID,sig.chrom$Chr,length)[lgs]
names(obs.perchrom)<-lgs
obs.perchrom[is.na(obs.perchrom)]<-0
obs.perchrom
## LG1 LG2 LG3 LG4 LG5 LG6 LG7 LG8 LG9 LG10 LG11 LG12 LG13 LG14 LG15
## 5 4 1 14 0 3 0 0 1 1 0 5 1 6 0
## LG16 LG17 LG18 LG19 LG20 LG21 LG22
## 0 0 2 1 3 0 0
dmultinom(x = obs.perchrom,prob = exp.perchrom)
## [1] 1.333996e-25
I can also look for overlapping loci in pairwise saltwater-saltwater comparisons.
swsw.name<-"p4_stacks_swsw.png"
swsw.tx<-read.delim("stacks/p4.fst_TXCB-TXCC.txt")
swsw.al<-read.delim("stacks/p4.fst_ALST-FLSG.txt")
swsw.fl<-read.delim("stacks/p4.fst_FLCC-FLHB.txt")
###### SW-SW neighbors ######
tx.sw.sig<-swsw.tx[swsw.tx$Fisher.s.P<0.01,"Locus.ID"]
al.sw.sig<-swsw.al[swsw.al$Fisher.s.P<0.01,"Locus.ID"]
fl.sw.sig<-swsw.fl[swsw.fl$Fisher.s.P<0.01,"Locus.ID"]
length(tx.sw.sig[(tx.sw.sig %in% c(al.sw.sig,fl.sw.sig))])
## [1] 106
length(al.sw.sig[(al.sw.sig %in% c(tx.sw.sig,fl.sw.sig))])
## [1] 135
length(fl.sw.sig[(fl.sw.sig %in% c(tx.sw.sig,al.sw.sig))])
## [1] 35
sw.shared<-fl.sw.sig[fl.sw.sig %in% al.sw.sig & fl.sw.sig %in% tx.sw.sig]
png(swsw.name,height=6,width=7.5,units="in",res=300)
par(mfrow=c(3,1),mar=c(0.85,2,0,0.5),oma=c(1,1,1,0.5))
swswt.fst<-fst.plot(swsw.tx,fst.name = "Corrected.AMOVA.Fst", bp.name = "BP",chrom.name = "Chr",
scaffs.to.plot=plot.scaffs, y.lim = c(0,1),scaffold.widths = bounds,axis.size = 1,
pt.cols=c(grp.colors[1],grp.colors[2]),pt.cex = 1,pch=19)
mtext("TXCC vs. TXCB",3,line=-1,cex=0.75)
swswa.fst<-fst.plot(swsw.al,fst.name = "Corrected.AMOVA.Fst", bp.name = "BP",chrom.name = "Chr", y.lim=c(0,1),
scaffs.to.plot=plot.scaffs, scaffold.widths = bounds,axis.size = 1,
pt.cols=c(grp.colors[2],grp.colors[3]),pt.cex = 1,pch=19)
#points(swswa.fst$plot.pos,swswa.fst$Corrected.AMOVA.Fst,pch=21,bg=grp.colors[3])
mtext("ALST vs. FLSG",3,line=-1,cex=0.75)
swswf.fst<-fst.plot(swsw.fl,fst.name = "Corrected.AMOVA.Fst", bp.name = "BP",chrom.name = "Chr",
scaffs.to.plot=plot.scaffs, y.lim = c(0,1),scaffold.widths = bounds,axis.size = 1,
pt.cols=c(grp.colors[6],grp.colors[5]),pt.cex = 1,pch=19)
#points(swswf.fst$plot.pos,swswf.fst$Corrected.AMOVA.Fst,pch=21,bg=grp.colors[6])
mtext("FLCC vs. FLHB",3,line=-1,cex=0.75)
mtext(expression(italic(F)[ST]),2,outer=T,line=-0.75,cex=0.75)
last<-0
for(i in 1:length(lgs)){
text(x=mean(swswf.fst[swswf.fst$Chr ==lgs[i],"plot.pos"]),y=-0.07,
labels=lgn[i], adj=1, xpd=TRUE)
last<-max(swswf.fst[swswf.fst$Chr ==lgs[i],"plot.pos"])
}
dev.off()
## png
## 2
SWSW Fsts
In Figure 6, we show different measures of diversity: pi (nucleotide diversity), heterozygosity, Jost’s D, and delta-divergence. Additionally, we include the shared Fst outliers and putative loci, and highlight regions with high pi and (H).
#' Calculate AFS from vcf
fw.afs<-lapply(fw.list,function(pop){
this.vcf<-cbind(vcf[,locus.info],vcf[,grep(pop,colnames(vcf))])
this.afs<-do.call(rbind,apply(this.vcf,1,calc.afs.vcf))
})
names(fw.afs)<-c("TXFW","LAFW","ALFW","FLFW")
sw.afs<-lapply(sw.list,function(pop){
this.vcf<-cbind(vcf[,locus.info],vcf[,grep(pop,colnames(vcf))])
this.afs<-do.call(rbind,apply(this.vcf,1,calc.afs.vcf))
})
names(sw.afs)<-sw.list
all.afs<-c(fw.afs,sw.afs)
Figure S1. Fsts
pi.file.name<-"p4.pi.txt"
avgpi.file.name<-"p4.avgpi.txt"
all.pi<-data.frame(Chrom=vcf$`#CHROM`,Pos=vcf$POS,Pi=unlist(apply(vcf,1,calc.pi)))
all.pi$SNP<-paste(all.pi$Chrom,as.numeric(as.character(all.pi$Pos)),sep=".")
write.table(all.pi,pi.file.name,col.names = TRUE,row.names=FALSE, quote=FALSE,sep='\t')
all.pi<-read.table(pi.file.name,header=T)
avg.pi.adj<-read.table(avgpi.file.name,header=T)
all.het.name<-"p4.avg.het.txt"
avg.het.adj.name<-"p4.avg.het.adj.txt"
#het
avg.het<-do.call("rbind",sliding.window(vcf,lgs,stat="het",width=10))
avg.het.adj<-fst.plot(avg.het,scaffold.widths=scaff.starts,pch=19,
fst.name = "Avg.Stat",chrom.name = "Chr",bp.name = "Avg.Pos")
all.het<-data.frame(Chrom=vcf$`#CHROM`,Pos=vcf$POS,Het=unlist(apply(vcf,1,calc.het)))
all.het$SNP<-paste(all.het$Chrom,as.numeric(as.character(all.het$Pos)),sep=".")
write.table(avg.het.adj,avg.het.adj.name,sep='\t',col.names=TRUE)
write.table(all.het,all.het.name,sep='\t',col.names=TRUE)
avg.het.adj<-read.delim(avg.het.adj.name,header=TRUE)
all.het<-read.delim(all.het.name,header=TRUE)
swfw.mu<-calc.mean.fst(vcf = vcf,pop.list1 = sw.list,pop.list2 = fw.list,maf.cutoff=0.01)
fwfw.mu<-calc.mean.fst(vcf = vcf,pop.list1 = fw.list,pop.list2 = fw.list, maf.cutoff=0.01)
deltad<-merge(swfw.mu,fwfw.mu,by="SNP")
deltad<-deltad[,c("SNP","Chrom.x","Pos.x","Mean.Fst.x","Mean.Fst.y")]
colnames(deltad)<-c("SNP","Chrom","Pos","MeanSWFW.Fst","MeanFWFW.Fst")
deltad$deltad<-deltad$MeanSWFW.Fst - deltad$MeanFWFW.Fst
deltad<-deltad[!is.na(deltad$deltad),]#remove NAs
write.table(deltad, dd.name,sep='\t',col.names = TRUE,row.names = FALSE)
#' ```
This plotting function also generates the smoothed \(\delta\)-divergence values (which are not saved) and identifies the outliers (in 99% and 1% quantiles, and which are saved in sdd.out).
First, I must calculate Jost’s D on each locus in the p4 dataset
sub.genind<-read.structure("stacks/subset.structure.stru",n.ind=698,
n.loc=9638,col.lab=1,col.pop=2,sep='\t',
row.marknames = 2,onerowperind=FALSE,ask=FALSE)
sub.genind@pop<-factor(gsub("sample_(\\w{4}).*","\\1",rownames(sub.genind@tab)))
jostd<-D_Jost(sub.genind)
write.table(jostd$per.locus,"p4.jostd.perlocus.txt",sep='\t',col.names=FALSE,row.names = TRUE,quote=F)
Now this is done, so I don’t need to evaluate it again – I’ll just need to read it in from a file when I want to use it.
jostd<-read.delim(jostd.name,header=F)
colnames(jostd)<-c("locid","D")
jostd$SNP<-vcf$SNP
jostd$POS<-vcf$POS
jostd$Chr<-vcf$`#CHROM`
jostd$ID<-vcf$ID
smoothed.name<-"p4_deltad_pi_het.png"
#assign the plotting positions relative to all loci
stacks.sig<-assign.plotpos(stacks.sig,plot.scaffs = plot.scaffs,bounds=bounds,df.chrom="Chr",df.bp="BP")
dd<-assign.plotpos(deltad,plot.scaffs = plot.scaffs,bounds=bounds,df.chrom="Chrom",df.bp="Pos")
pi<-assign.plotpos(all.pi,plot.scaffs = plot.scaffs,bounds=bounds,df.chrom="Chrom",df.bp="Pos")
ht<-assign.plotpos(all.het,plot.scaffs = plot.scaffs,bounds=bounds,df.chrom="Chrom",df.bp="Pos")
jd<-assign.plotpos(jostd,plot.scaffs = plot.scaffs,bounds=bounds,df.chrom="Chr",df.bp="POS")
colnames(jd)<-c("locid","D","SNP","Pos","Chrom","ID","plot.pos") #standardize the names
smooth.per.chrom<-function(df,lgs,df.chrom,df.bp,df.stat,out.name,...){#span=0.1,degree=2
smooth.all<-data.frame(stringsAsFactors = FALSE) #all smoothed values
smooth.low<-data.frame(stringsAsFactors = FALSE) #lower outliers
smooth.upp<-data.frame(stringsAsFactors = FALSE) #upper outliers
for(i in 1:length(lgs)){
this.chrom<-df[df[,df.chrom] %in% lgs[i],]
this.smooth<-loess.smooth(this.chrom[,df.bp],this.chrom[,df.stat],...)
this.smooth<-data.frame(chr=cbind(rep(as.character(lgs[i]),length(this.smooth$x)),
pos=as.numeric(this.smooth$x),
smoothed.stats=as.numeric(this.smooth$y)),stringsAsFactors = FALSE)
this.upp<-this.chrom[this.chrom[df.stat]>=quantile(this.chrom[,df.stat],0.99),]#choosing the upper outliers
this.low<-this.chrom[this.chrom[,df.stat]<=quantile(this.chrom[,df.stat],0.01),]#choosing the lower outliers
# save the data
smooth.all<-data.frame(rbind(smooth.all,this.smooth))
smooth.low<-rbind(smooth.low,this.low)
smooth.upp<-rbind(smooth.upp,this.upp)
}
smooth.upp$direction<-"upper"
smooth.low$direction<-"lower"
smooth.out<-rbind(smooth.low,smooth.upp) # put the outliers in one df
colnames(smooth.all)<-c("chr","pos","smoothed.stats")
smooth.all$pos<-as.numeric(as.character(smooth.all$pos))
smooth.all$smoothed.stats<-as.numeric(as.character(smooth.all$smoothed.stats))
# save to files
write.table(smooth.out,paste(out.name,"outliers.txt",sep=""),col.names=T,row.names=F,quote=F,sep='\t')
write.table(smooth.all,paste(out.name,"smoothed.txt",sep=""),col.names=T,row.names=F,quote=F,sep='\t')
smoothed<-list(smooth.out,smooth.all)
}
#smooth the statistics
dd.bp.smooth<-smooth.per.chrom(dd,lgs,df.chrom="Chrom",df.bp="Pos",df.stat="deltad",out.name="dd.bp",span=.1,degree=2)
pi.bp.smooth<-smooth.per.chrom(pi,lgs,df.chrom="Chrom",df.bp="Pos",df.stat="Pi",out.name="pi.bp",span=0.1,degree=2)
ht.bp.smooth<-smooth.per.chrom(ht,lgs,df.chrom="Chrom",df.bp="Pos",df.stat="Het",out.name="ht.bp",span=0.1,degree=2)
jd.bp.smooth<-smooth.per.chrom(jd,lgs,df.chrom="Chrom",df.bp="Pos",df.stat="D",out.name="jd.bp",span=0.1,degree=2)
#smooth the statistics
dd.pp.smooth<-smooth.per.chrom(dd,lgs,df.chrom="Chrom",df.bp="plot.pos",df.stat="deltad",out.name="dd",span=0.1,degree=2)
pi.pp.smooth<-smooth.per.chrom(pi,lgs,df.chrom="Chrom",df.bp="plot.pos",df.stat="Pi",out.name="pi",span=0.1,degree=2)
ht.pp.smooth<-smooth.per.chrom(ht,lgs,df.chrom="Chrom",df.bp="plot.pos",df.stat="Het",out.name="ht",span=0.1,degree=2)
jd.pp.smooth<-smooth.per.chrom(jd,lgs,df.chrom="Chrom",df.bp="plot.pos",df.stat="D",out.name="jd",span=0.1,degree=2)
comp.col<-c(Het="#80cdc1",pi="#018571",Fst="black",D="#a6611a",deltad="#dfc27d")
plot.smooth.stats<-function(df,smooth,stat.name,color,ylabel,...){
dd<-fst.plot(fst.dat = df,...)
mtext(ylabel,2,line=1.5,cex=0.75)
points(x=as.numeric(as.character(smooth[[2]]$pos)),
y=as.numeric(as.character(smooth[[2]]$smoothed.stats)),col=color,type="l",lwd=2)
points(x=as.numeric(smooth[[2]]$pos),y=as.numeric(smooth[[2]]$smoothed.stats),col=color,type="l",lwd=2)
points(smooth[[1]]$plot.pos[smooth[[1]]$direction=="upper"],smooth[[1]][smooth[[1]]$direction=="upper",stat.name],
pch=24,bg=color,col=color,cex=0.5) #add outliers
points(smooth[[1]]$plot.pos[smooth[[1]]$direction=="lower"],smooth[[1]][smooth[[1]]$direction=="lower",stat.name],
pch=25,bg=color,col=color,cex=0.5)
}
xlab.lgs<-function(df,lgs,lgn,chr,bp,...){
last<-0
for(i in 1:length(lgs)){
text(x=median(df[df[,chr] ==lgs[i],bp]),labels=lgn[i],...)
last<-max(df[df[,chr] ==lgs[i],bp])
}
}
png(smoothed.name,height=7,width=5,units="in",res=300)
par(mfrow=c(4,1),mar=c(1,1,1,1),oma=c(1,2,1,1),cex=0.75)
t<-plot.smooth.stats(df=dd,smooth=dd.pp.smooth,stat.name="deltad",
color=comp.col["deltad"],fst.name="deltad",bp.name="Pos",
ylabel=expression(paste(delta,"-divergence")),y.lim=c(-0.5,1),
axis=0.75,scaffold.widths=scaff.starts,pch=19,scaffs.to.plot = scaffs)
xlab.lgs(dd,lgs,lgn,"Chrom","plot.pos",adj=1, xpd=TRUE,y=-0.5,cex=0.75)
#plot Jost's D
d<-plot.smooth.stats(df=jd,smooth=jd.pp.smooth,stat.name="D",
color=comp.col["D"],fst.name="D",chrom.name = "Chrom",bp.name="Pos",
ylabel=expression("Jost's"~italic(D)),
axis=0.75,scaffold.widths=scaff.starts,pch=19,scaffs.to.plot = scaffs)
xlab.lgs(jd,lgs,lgn,"Chrom","plot.pos",adj=1, xpd=TRUE,y=-0.075,cex=0.75)
#plot pi
p<-plot.smooth.stats(df=pi,smooth=pi.pp.smooth,stat.name="Pi",
color=comp.col["pi"],fst.name="Pi",chrom.name = "Chrom",bp.name="Pos",
ylabel=expression(pi),y.lim=c(0,0.5),
axis=0.75,scaffold.widths=scaff.starts,pch=19,scaffs.to.plot = scaffs)
xlab.lgs(pi,lgs,lgn,"Chrom","plot.pos",adj=1, xpd=TRUE,y=-0.05,cex=0.75)
#abline(v=stacks.sig$plot.pos,col="grey10") #stacks
#plot heterozygosity
h<-plot.smooth.stats(df=ht,smooth=ht.pp.smooth,stat.name="Het",
color=comp.col["Het"],fst.name="Het",chrom.name = "Chrom",bp.name="Pos",
ylabel=expression(italic(H)),
axis=0.75,scaffold.widths=scaff.starts,pch=19,scaffs.to.plot = scaffs)
xlab.lgs(ht,lgs,lgn,"Chrom","plot.pos",adj=1, xpd=TRUE,y=-0.05,cex=0.75)
dev.off()
## png
## 2
Figure x. Smoothed variables
bf$SNP<-paste(bf$scaffold,as.numeric(as.character(bf$BP))+1,sep=".")
bf.co<-apply(bf[,5:7],2,quantile,0.99) #focus on Bayes Factors, because of Lotterhos & Whitlock (2015)
sal.bf.sig<-bf[bf$Salinity_BF>bf.co["Salinity_BF"],c(1,2,4,8,6,9)]
all.out<-data.frame(rbind(cbind(paste(fw.sig.reg$Chr,fw.sig.reg$BP,sep="."),"fst"),
cbind(dd.bp.smooth[[1]]$SNP,"deltad"),
cbind(ht.bp.smooth[[1]]$SNP,"het"),
cbind(jd.bp.smooth[[1]]$SNP,"jostd"),
cbind(pi.bp.smooth[[1]]$SNP,"pi"),
cbind(sal.bf.sig$SNP,"bayenv_salinity")),stringsAsFactors = FALSE)
colnames(all.out)<-c("SNP","test")
write.table(all.out,"all_outliers.txt",sep='\t',quote=FALSE,col.names=TRUE,row.names=FALSE)
Here, I compare the Fst outliers and \(\delta\)-divergence outliers to putative genes identified from other studies of teleosts adapting to freshwater environments.
#putative genes
put.genes<-read.delim("putative_genes.txt",header=TRUE,sep='\t')
#genome annotations
gff.name<-"ssc_2016_12_20_chromlevel.gff.gz"
if(length(grep("gz",gff.name))>0){
gff<-read.delim(gzfile(paste("../../scovelli_genome/",gff.name,sep="")),header=F)
} else{
gff<-read.delim(paste("../../scovelli_genome/",gff.name,sep=""),header=F)
}
colnames(gff)<-c("seqname","source","feature","start","end","score","strand","frame","attribute")
#ld info
ld<-read.delim("p4.ld_info_fwsw.txt")
These are updated from fwsw_analysis.R and are specific to these analyses, and are not generalizable!
ssc.fwsw.fstsig<-function(tx, la, al, fl, cutoff)
{
tx.sig<-tx[tx$Fisher.s.P<cutoff,"Locus.ID"]
la.sig<-la[la$Fisher.s.P<cutoff,"Locus.ID"]
al.sig<-al[al$Fisher.s.P<cutoff,"Locus.ID"]
fl.sig<-fl[fl$Fisher.s.P<cutoff,"Locus.ID"]
length(tx.sig[(tx.sig %in% c(la.sig,al.sig,fl.sig))])
length(la.sig[(la.sig %in% c(tx.sig,al.sig,fl.sig))])
length(al.sig[(al.sig %in% c(la.sig,tx.sig,fl.sig))])
all.shared<-fl.sig[fl.sig %in% la.sig & fl.sig %in% al.sig & fl.sig %in% tx.sig]
fw.shared.chr<-fwsw.tx[fwsw.tx$Locus.ID %in% all.shared,c("Locus.ID","Chr","BP","Column","Overall.Pi")]
tapply(fw.shared.chr$Locus.ID,factor(fw.shared.chr$Chr),function(x){ length(unique(x)) })
#are they using the same SNPs or different SNPs?
stacks.sig<-data.frame(nrow=nrow(fw.shared.chr),ncol=4)
for(i in 1:nrow(fw.shared.chr)){
tx.bp<-fwsw.tx[tx$Fisher.s.P<cutoff & tx$Locus.ID == fw.shared.chr[i,"Locus.ID"],"BP"]
la.bp<-fwsw.la[la$Fisher.s.P<cutoff & la$Locus.ID == fw.shared.chr[i,"Locus.ID"],"BP"]
al.bp<-fwsw.al[al$Fisher.s.P<cutoff & al$Locus.ID == fw.shared.chr[i,"Locus.ID"],"BP"]
fl.bp<-fwsw.fl[fl$Fisher.s.P<cutoff & fl$Locus.ID == fw.shared.chr[i,"Locus.ID"],"BP"]
stacks.sig[i,1]<-paste(tx.bp,sep=",",collapse = ",")
stacks.sig[i,2]<-paste(la.bp,sep=",",collapse = ",")
stacks.sig[i,3]<-paste(al.bp,sep=",",collapse = ",")
stacks.sig[i,4]<-paste(fl.bp,sep=",",collapse = ",")
}
colnames(stacks.sig)<-c("TX","LA","AL","FL")
stacks.sig<-data.frame(cbind(fw.shared.chr,stacks.sig))
stacks.sig$SNP<-paste(stacks.sig$Chr,stacks.sig$BP,sep=".")
return(stacks.sig)
}
#' extract the significant regions from the gff file
sig.region.ann<-function(fw.shared.chr,gff)
{
fw.sig.reg<-do.call(rbind,apply(fw.shared.chr,1,function(sig){
this.gff<-gff[as.character(gff$seqname) %in% as.character(unlist(sig["Chr"])),]
this.reg<-this.gff[this.gff$start <= as.numeric(sig["BP"]) & this.gff$end >= as.numeric(sig["BP"]),]
if(nrow(this.reg) == 0){
if(as.numeric(sig["BP"])>max(as.numeric(this.gff$end))){
new<-data.frame(Locus=sig["Locus.ID"],Chr=sig["Chr"],BP=sig["BP"],SNPCol=sig["Column"],
region="beyond.last.contig", description=NA,SSCID=NA)
}else{
new<-data.frame(Locus=sig["Locus.ID"],Chr=sig["Chr"],BP=sig["BP"],SNPCol=sig["Column"],
region=NA,description=NA,SSCID=NA)
}
}else{
if(length(grep("SSCG\\d+",this.reg$attribute))>0){
geneID<-unique(gsub(".*(SSCG\\d+).*","\\1",this.reg$attribute[grep("SSCG\\d+",this.reg$attribute)]))
gene<-genome.blast[genome.blast$sscv4_gene_ID %in% geneID,"blastp_hit_description"]
}else{
geneID<-NA
gene<-NA
}
new<-data.frame(Locus=sig["Locus.ID"],Chr=sig["Chr"],BP=sig["BP"],SNPCol=sig["Column"],
region=paste(this.reg$feature,sep=",",collapse = ","),description=gene,SSCID=geneID)
}
return(as.data.frame(new))
}))
}
#' extract the significant regions from the gff file
put.reg<-do.call(rbind,apply(put.genes,1,function(gene){
ssc.gene<-as.character(gene[[6]])
#there might be multiple matches
ssc.genes<-unlist(strsplit(ssc.gene,","))
#for each gene, match to gff
gene.info<-do.call(rbind,lapply(ssc.genes,function(genename){
this.gff<-gff[grep(genename,gff$attribute),]
chrom<-unique(as.character(this.gff$seqname))
start<-min(this.gff$start)
stop<-max(this.gff$end)
return(c(chrom,start,stop))
}))
if(as.character(gene[[3]]) == "") { gene[[3]]<-NA }
new<-data.frame(Gene=rep(gene[[1]],length(gene.info[,1])),
Function=rep(gene[[2]],length(gene.info[,1])),
Chromsome=rep(gene[[3]],length(gene.info[,1])),
Species=rep(gene[[4]],length(gene.info[,1])),
Citation=rep(gene[[5]],length(gene.info[,1])),
Scovelli_geneID=rep(gene[[6]],length(gene.info[,1])),
Notes=rep(gene[[7]],length(gene.info[,1])),
Chrom=gene.info[,1],StartBP=gene.info[,2],StopBP=gene.info[,3],
stringsAsFactors = FALSE)
return(as.data.frame(new))
}))
write.table(put.reg,"putative.gene.regions.tsv",col.names=T,sep='\t')
Note that some genes will have multiple rows as they match multiple locations in the genome.
put.reg$rad.loci<-apply(put.reg,1,function(gene){
rad<-vcf[vcf$`#CHROM` %in% gene[["Chrom"]] &
vcf$POS >= as.numeric(as.character(gene[["StartBP"]])) &
vcf$POS <= as.numeric(as.character(gene[["StopBP"]])),]
if(nrow(rad)>0){ rad.snps<-paste(rad$SNP,sep=",",collapse=",") }
else { rad.snps<-NA }
return(rad.snps)
})
About 48.1818182% of the putative genes have RAD loci within them - but only about 22.037037% of all of the annotations have RAD loci.
\(D'\) was calculated using a custom C++ program (found at https://github.com/spflanagan/SCA/tree/master/programs/pairwise_ld_vcf) using the formula:
\[D' = \sum_{i=1}^m\sum_{j=1}^n \frac{p_iq_j|D_{ij}|}{D_{max}},\] where \(m\) is the number of alleles at locus and \(n\) is the number of alleles at locus B. \(D_{ij}\) was calculated as \(f_{ij}-p_iq_j\), where \(f_{ij}\) is the frequency of the \(A_iB_j\) haplotype, \(p_i\) is the frequency of allele \(i\), and \(q_j\) is the frequency of allele \(j\). \(D_{max}\) is the lesser of \(p_iq_j\) or \((1-p_i)(1-q_j)\) when \(D_{ij} \lt 0\) and is the minimum of \((1-p_i)q_i\) or \(p_i(1-q_i)\) when \(D_{ij} \gt 0\).
In the data.frame ld, the locus IDs (LocIDA and LocIDB) are the positions on the chromosome.
ld$pos.diff<-abs(ld$LocIDA - ld$LocIDB)
chrom.ld<-tapply(ld[ld$D.>0.5,"pos.diff"],ld[ld$D. >0.5,"Chrom"],mean)
#' Find outliers in a putative gene region
outlier.in.region<-function(outlier.df, region.df, oChrom="Chrom", oBP="BP",
rChrom="Chrom",rBPStart="StartBP",rBPStop="StopBP"){
out.in<-apply(region.df,1,function(put.gene){
oin<-outlier.df[outlier.df[[oChrom]] %in% put.gene[[rChrom]]
& outlier.df[[oBP]] >= as.numeric(as.character(put.gene[[rBPStart]]))
& outlier.df[[oBP]] <= as.numeric(as.character(put.gene[[rBPStop]])),"SNP"]
if(length(oin)==0){ oin<-NA }
return(paste(oin,sep=",",collapse=","))
})
return(out.in)
}
outlier.nearby<-function(outlier.df,region.df,ld.df,
oChrom="Chrom", oBP="BP",
rChrom="Chrom",rBPStart="StartBP",rBPStop="StopBP"){
out.nearby<-apply(region.df,1,function(put.gene){
nearby<-outlier.df[outlier.df[[oChrom]] %in% put.gene[[rChrom]]
& outlier.df[[oBP]] >= (as.numeric(as.character(put.gene[[rBPStart]])) - (ld.df[put.gene[[rChrom]] ]))
& outlier.df[[oBP]] <= (as.numeric(as.character(put.gene[[rBPStop]])) + (ld.df[put.gene[[rChrom]] ])),"SNP"]
if(length(nearby)==0){ nearby<-NA }
return(paste(nearby,sep=",",collapse=","))
})
}
#' Get the plotting positions for the putative genes
all.chr<-data.frame(Chr=vcf$`#CHROM`,BP=vcf$POS,stringsAsFactors = F)
bounds<-data.frame(levels(as.factor(all.chr$Chr)),tapply(as.numeric(as.character(all.chr$BP)),all.chr$Chr,max))
colnames(bounds)<-c("Chrom","End")
plot.scaffs<-scaffs[scaffs %in% bounds$Chrom]
#convert put.reg to the plotting BP
new.dat<-data.frame(stringsAsFactors = F)
last.max<-0
for(i in 1:length(plot.scaffs)){
#pull out the data for this scaffold
if(nrow(bounds[bounds$Chrom %in% plot.scaffs[i],])>0){ #sanity check
chrom.dat<-put.reg[put.reg$Chrom %in% plot.scaffs[i],]
if(nrow(chrom.dat)>0){
chrom.dat$plot.min<-as.numeric(as.character(chrom.dat$StartBP))+last.max
chrom.dat$plot.max<-as.numeric(as.character(chrom.dat$StopBP))+last.max
new.dat<-rbind(new.dat,chrom.dat)
#last.max<-max(chrom.dat$plot.pos)+
# as.numeric(scaffold.widths[scaffold.widths[,1] %in% scaffs.to.plot[i],2])
}
last.max<-last.max+
as.numeric(bounds[bounds$Chrom %in% plot.scaffs[i],2])
}else{
print(paste(plot.scaffs[i], "has no designated width."))
}
}
#make sure everything is the correct class
new.dat$plot.min<-as.numeric(as.character(new.dat$plot.min))
new.dat$plot.max<-as.numeric(as.character(new.dat$plot.max))
Are outliers in putative gene regions?
fw.sig.reg<-read.csv("p4.StacksFWSWOutliers_annotatedByGenome.csv")
all.put.ssc<-as.character(put.reg$Scovelli_geneID)
all.put.ssc<-unlist(lapply(all.put.ssc,strsplit,","))
sig.put.match<-all.put.ssc[all.put.ssc %in% fw.sig.reg$SSCID]
print(dim(sig.put.match))
## NULL
#or use the functions
fw.sig.reg$SNP<-paste(fw.sig.reg$Chr,as.character(fw.sig.reg$BP),sep=".")
stacks.in<-outlier.in.region(fw.sig.reg,put.reg,oChrom="Chr")
Of the 54 shared outlier SNPs, 27 are in coding regions of the genome. However, 2 shared outliers are in putative gene regions.
No, they don’t overlap. Maybe they’re in the same LD region?
put.nearby.rad<-outlier.nearby(fw.sig.reg,put.reg,chrom.ld,oChrom = "Chr")
#add the nearby significant rad loci to the put.reg data.frame
put.reg$nearby.rad<-put.nearby.rad
head(put.reg[put.reg$nearby.rad!="NA",])
## Gene
## 1 ABCB7
## 2 ABLIM3
## 3 ADAM19
## 4 ADAM19
## 6 ADRB2
## 7 ADRB2
## Function
## 1 iron metabolism
## 2 actin-binding
## 3 osteoblast differentiation
## 4 osteoblast differentiation
## 6 bone density and mineralization
## 7 bone density and mineralization, tooth organogenesis, craniofacial development
## Chromsome Species Citation
## 1 <NA> Gasterosteus aculeatus Jones et al 2012b
## 2 <NA> Gasterosteus aculeatus Hohenlohe et al 2010
## 3 IV Gasterosteus aculeatus Hohenlohe et al 2010
## 4 IV Gasterosteus aculeatus Hohenlohe et al 2010
## 6 VII Gasterosteus aculeatus Hohenlohe et al 2010
## 7 IV Gasterosteus aculeatus Hohenlohe et al 2010
## Scovelli_geneID Notes Chrom StartBP StopBP rad.loci
## 1 SSCG00000000949 <NA> LG14 5689975 5711169 <NA>
## 2 SSCG00000004181 <NA> LG14 12122996 12144915 <NA>
## 3 SSCG00000004162,SSCG00000007962 <NA> LG14 11860301 11877333 <NA>
## 4 SSCG00000004162,SSCG00000007962 <NA> LG14 22306738 22325075 <NA>
## 6 SSCG00000004169 <NA> LG14 12061583 12065881 <NA>
## 7 SSCG00000004169 <NA> LG14 12061583 12065881 <NA>
## nearby.rad
## 1 LG14.3289190,LG14.3352883,LG14.3960463,LG14.5029044
## 2 LG14.5029044,LG14.13689654,LG14.18295431
## 3 LG14.5029044,LG14.13689654,LG14.18295431
## 4 LG14.18295431
## 6 LG14.5029044,LG14.13689654,LG14.18295431
## 7 LG14.5029044,LG14.13689654,LG14.18295431
So of the 540 gene annotations 229 are within the mean LD range of an outlier, which represent 67 putative freshwater genes. Those genes are: ABCB7, ABLIM3, ADAM19, ADRB2, AFAP1L1, angiotensin II receptor, ANXA6, APOL, AQP3, ARHGEF3, ATP6V0A1, ATP6V1A, AVPR2, BGN, CA, CAII, CAMKK1, CD63, CEBPD, CFTR, cortisol, ECaC, EDA, EFNB1, FBLN1, FERH1, FZD2, HRH2, IGFBP5, IGFBP6, IGF-I, IL12B, KCNH4, KITLG, LEPR, LTB4R, MAPK11, MAPK12, MSX2, mucin, NAKATPase, NBC1, NCX, NHE, NKCC, NR4A1, PDLIM7, PMCA, PODXL, PRKCD, PRL, RDH10, RHOA, RHOGTP8, rtCR2, SCUBE1, SLC26A3, SLC2A13, SPG1, SYNPO, TBX2, TRIM14, UT, VATPase, WNT5A, WNT7B, ZEB1
Now for \(\delta\) -divergence
#in the gene
sdd.in<-outlier.in.region(dd.bp.smooth[[1]],put.reg,oBP = "Pos")
put.reg$sdd.in<-sdd.in
The genes ARHGEF3, OSBPL8, RHOGTP8 contain \(\delta\)-divergence outlier regions.
#in the gene
sdd.nb<-outlier.nearby(dd.bp.smooth[[1]],put.reg,chrom.ld,oBP = "Pos")
put.reg$sdd.nb<-sdd.nb
And of the 154 \(\delta\)-divergence outliers, 91.4814815% are in the LD region around putative freshwater genes.
#taken directly from fwsw_analysis.R
bf<-read.delim("bayenv/p4.bf.txt",header=T)
bf$SNP<-paste(bf$scaffold,as.numeric(as.character(bf$BP))+1,sep=".")
bf.co<-apply(bf[,5:7],2,quantile,0.99) #focus on Bayes Factors, because of Lotterhos & Whitlock (2015)
temp.bf.sig<-bf[bf$Temp_BF>bf.co["Temp_BF"],c(1,2,4,8,5,9)]
sal.bf.sig<-bf[bf$Salinity_BF>bf.co["Salinity_BF"],c(1,2,4,8,6,9)]
grass.bf.sig<-bf[bf$seagrass_BF>bf.co["seagrass_BF"],c(1,2,4,8,7,9)]
#get the log transformed Bayes Factors
bf$logSal<-log(bf$Salinity_BF)
bf$logTemp<-log(bf$Temp_BF)
bf$logSeagrass<-log(bf$seagrass_BF)
There are 0 overlapping outliers between temperature-, salinity-, and seagrass-associated loci.
But if we only care about salinity ones, there are 91 outliers.
Are any of the Bayenv salinity outliers near the putative freshwater genes?
bfs.in<-outlier.in.region(sal.bf.sig,put.reg,"scaffold")
bfs.nb<-outlier.nearby(sal.bf.sig,put.reg,chrom.ld,"scaffold")
Of the 9053 RAD loci analyzed by Bayenv2 for associations with temperature, 0.3703704% are in putative freshwater genes and 75.5555556% are within the LD neighborhood.
Are any of the Bayenv temperature outliers near putative freshwater genes?
bft.in<-outlier.in.region(temp.bf.sig,put.reg,"scaffold")
bft.nb<-outlier.nearby(temp.bf.sig,put.reg,chrom.ld,"scaffold")
Of the 9053 RAD loci analyzed by Bayenv2 for associations with temperature, 0.5555556% are in putative freshwater genes and 80.1851852% are within the LD neighborhood.
Are any of the loci associated with seagrass density in or near putative freshwater genes?
bfg.in<-outlier.in.region(grass.bf.sig,put.reg,"scaffold")
bfg.nb<-outlier.nearby(grass.bf.sig,put.reg,chrom.ld,"scaffold")
Of the 9053 RAD loci analyzed by Bayenv2 for associations with temperature, 0.1851852% are in putative freshwater genes and 77.962963% are within the LD neighborhood.
put.reg$bfs.in<-bfs.in
put.reg$bft.in<-bft.in
put.reg$bfg.in<-bfg.in
unique(put.reg[put.reg$bfs.in !="NA","Gene"])
## [1] ARHGEF3 NHE
## 110 Levels: ABCB7 ABLIM3 ADAM19 ADAMTS10 ADRB2 AE ... ZEB1
unique(put.reg[put.reg$bft.in !="NA","Gene"])
## [1] APOL NHE TRIM14
## 110 Levels: ABCB7 ABLIM3 ADAM19 ADAMTS10 ADRB2 AE ... ZEB1
unique(put.reg[put.reg$bfg.in !="NA","Gene"])
## [1] ARHGEF3
## 110 Levels: ABCB7 ABLIM3 ADAM19 ADAMTS10 ADRB2 AE ... ZEB1
All three BayEnv tests identified outliers in the genes . The temperature and grass also share NA, and temperature and salinity share NHE.
Jost’s D measures the fraction of allelic differentiation between populations. According to Molecular Ecologist, “if allelic differentiation at a particular locus is the value of interest, it appears that D is the best measure”, so it might be useful to look at Jost’s D in the putatively freshwater genes.
jost.in<-outlier.in.region(jd.bp.smooth[[1]],put.reg,oChrom="Chrom",oBP="Pos")
jost.nb<-outlier.nearby(jd.bp.smooth[[1]],put.reg,chrom.ld,oChrom = "Chrom",oBP="pos")
put.reg$jostd.nb<-jost.nb
put.reg$jostd.in<-jost.in
So 1 loci are located within putative freshwater genes, but 0 are within the LD region of the putative genes, representing 0 of the putative gene annotations and 0 genes
*Genes containing shared Fst outliers: NA, AE, APOL, AQP3, ARHGEF3, ATP6V0A1, ATP6V1A, BMI1, CA, CA4, CAII, CAMKK1, CLINT1, cortisol, EFNB1, FBLN1, FLT4, IGFBP2, IGFBP5, IGFBP6, IL12B, KCNH4, Kir, Kir2.2, KITLG, LEPR, LTB4R, MAPK11, MAPK12, MSX2, mucin, NAKATPase, NCX, NHE, OSBPL8, PDLIM7, PRKCD, PRL, RHOA, RHOGTP8, ROBO1, rtCR1, SCUBE1, SLC2A13, SOD3, SPG1, STAT3, SYNPO, TIMP3, TNS1, TRIM14, TSHB, VATPase, ZEB1
*Genes containing \(\delta\) -divergence outliers: ARHGEF3, OSBPL8, RHOGTP8
*Genes containing Salinity-associated outliers: ARHGEF3, NHE
*Genes containing Jost D outliers: NHE
*Overlap: ARHGEF3
*Divergence analyses:
*Near divergence analyses: NHE
But looking at put.reg[put.reg$Gene =="RHOGTP8","rad.loci"], it’s obvious that Rho GTPase-activating proteins are distributed throughout the genome (on chromosomes LG11, LG15, LG20, LG14, LG1, scaffold_1578, LG12, LG9, LG8, LG22, LG18, LG5, scaffold_1008, LG6, scaffold_950, LG10, LG7, scaffold_139, LG21, LG16, LG19).
`
Find genomic regions with high \(\pi\) and low \(\delta\)-divergence
#Do high pi and het have low deltad?
#do it per chrom
balancing.sel<-function(pi.out,pi,ht.out,ht,
dd.out,dd,jd.out,jd){
pi.upp<-pi.out[pi.out$direction=="upper",]
ht.upp<-ht.out[ht.out$direction=="upper",]
dd.low<-dd.out[dd.out$direction=="lower",]
jd.low<-jd.out[jd.out$direction=="lower",]
shared.div<-pi.upp[pi.upp[,pi] %in% ht.upp[,ht],]
shared.dif<-dd.low[dd.low[,dd] %in% jd.low[,jd],]
bal<-shared.div[shared.div[,pi] %in% shared.dif[,dd]]
return(list(div=shared.div,dif=shared.dif,bal=bal))
}
bal<-balancing.sel(pi.pp.smooth[[1]],"plot.pos",ht.pp.smooth[[1]],"plot.pos",
dd.pp.smooth[[1]],"plot.pos",jd.pp.smooth[[1]],"plot.pos")
shared.upp<-bal$div
There are loci with high \(\pi\) and low \(\delta\)-divergence on 13 chromosomes.
Instead of the lumped marine-freshwater \(F_{ST}\) values that I used originally, I’m going to plot the average of pairwise \(F_{ST}\) values.
fst.means<-fwsw.al
for(i in 1:nrow(fst.means)){
if(fst.means$Locus.ID[i] %in% fwsw.fl$Locus.ID &
fst.means$Locus.ID[i] %in% fwsw.la$Locus.ID &
fst.means$Locus.ID[i] %in% fwsw.tx$Locus.ID){
mu<-mean(fwsw.fl$Corrected.AMOVA.Fst[fwsw.fl$Locus.ID==fst.means$Locus.ID[i]],
fwsw.la$Corrected.AMOVA.Fst[fwsw.la$Locus.ID==fst.means$Locus.ID[i]],
fwsw.al$Corrected.AMOVA.Fst[fwsw.al$Locus.ID==fst.means$Locus.ID[i]],
fwsw.tx$Corrected.AMOVA.Fst[fwsw.tx$Locus.ID==fst.means$Locus.ID[i]])
fst.means$Corrected.AMOVA.Fst[i]<-mu
}
else{
fst.means$Corrected.AMOVA.Fst[i]<-NA
}
}
fst.means<-fst.means[!is.na(fst.means$Corrected.AMOVA.Fst),]
fwsw<-fst.means
fst.points<-FALSE
#and putative genes
put.genes<-read.delim("putative_genes.txt",header=TRUE,sep='\t')
#genome annotations
#put.reg<-read.delim("putative.gene.regions.tsv",header=T)
chrom.ld<-tapply(ld[ld$D.>0.5,"pos.diff"],ld[ld$D. >0.5,"Chrom"],mean)
#select genes of interest
#fav.genes<-c("AQP3","TNS1","CAMKK1","mucin","CAII","NAKATPase","ARHGEF3")
#fav.genes<-c("TNS1","CAII","TRIM14","VATPase")
fav.genes<-c("ARHGEF3", "mucin", "NHE", "TAAR")
genes2plot<-put.reg[put.reg$Gene %in% fav.genes,]
#shared Fst outliers
fw.sig.reg<-read.csv("p4.StacksFWSWOutliers_annotatedByGenome.csv")
h.pi.name<-"HandPi_subgenes.png"
row.settings<-c(4,4)
chroms2plot<-unique(shared.upp$Chr)
#generate xlims
xlims<-lapply(chroms2plot,function(lg,ld,genes,vcf){
xs<-c(min(genes[genes$Chr == lg,"plot.pos"]-ld[lg]),
max(genes[genes$Chr == lg,"plot.pos"]+ld[lg]))
if(xs[1] < 0){
xs[1]<-0
}
if(xs[2] > max(vcf$POS[vcf$`#CHROM`==lg])){
xs[2]<-max(vcf$POS[vcf$`#CHROM`==lg])
}
return(xs)
},ld=chrom.ld,genes=shared.upp,vcf=vcf)
#or do full chroms
#generate xlims
xlims<-lapply(chroms2plot,function(lg,genes,vcf){
xs<-c(min(vcf$POS[vcf$`#CHROM`==lg]),
max(vcf$POS[vcf$`#CHROM`==lg]))
return(xs)
},genes=shared.upp,vcf=vcf)
names(xlims)<-chroms2plot
#colors
comp.col<-c(Het="#80cdc1",pi="#018571",Fst="black",D="#a6611a",deltad="#dfc27d")
#find the plotting location
nearest.pos<-function(stat.df,s.pos,s.stat,target,t.pos,t.stat){
near.pos<-t(apply(stat.df,1,function(df,target){
x<-as.numeric(df[s.pos])
pos<-target[which.min(abs(x-as.numeric(target[,t.pos]))),t.pos]
if(pos==x){
stat<-as.numeric(target[which.min(abs(x-as.numeric(target[,t.pos]))),t.stat])
}else{
if(pos>x){
upp<-which.min(abs(x-target[,t.pos]))
low<-upp-1
}else{
low<-which.min(abs(x-target[,t.pos]))
upp<-low+1
}
upp.pt<-target[upp,c(t.pos,t.stat)]
low.pt<-target[low,c(t.pos,t.stat)]
slope<-as.numeric((upp.pt[2]-low.pt[2])/(upp.pt[1]-low.pt[1]))
b<-as.numeric(upp.pt[2]-(slope*upp.pt[1]))
stat<-slope*x+b
}
return(cbind(x,stat))
},target=target))
return(near.pos)
}
upp.low.pts<-function(smooth,target,chrom,color,stat,pos.name,...){
#smooth== smooth[[1]]
#target== smooth[[2]]
upp<-smooth[smooth[,"Chrom"]%in% chrom & smooth$direction=="upper",]
low<-smooth[smooth[,"Chrom"]%in% chrom & smooth$direction=="lower",]
upp.pts<-nearest.pos(stat.df=upp,s.pos=pos.name,s.stat=stat,
target=target[target[,"chr"]%in% chrom,],
t.pos="pos",t.stat="smoothed.stats")
low.pts<-nearest.pos(stat.df=low,s.pos=pos.name,s.stat=stat,
target=target[target[,"chr"]%in% chrom,],
t.pos="pos",t.stat="smoothed.stats")
points(x=upp.pts[,1],y=upp.pts[,2],pch=24,bg=color,col=color,...)
points(x=low.pts[,1],y=low.pts[,2],pch=25,bg=color,col=color,...)
}
png(h.pi.name,height=8,width=14,units="in",res=300)
par(mfrow=row.settings,oma=c(1,1,1,1),mar=c(1,2,2,1))
for(i in 1:length(chroms2plot)){
this.df<-fwsw[fwsw$Chr %in% chroms2plot[i],]
this.xlim<-xlims[[as.character(chroms2plot[i])]]
plot(this.df$BP,this.df$Corrected.AMOVA.Fst, xlim=this.xlim,ylim=c(-0.2,0.5),axes=F,ylab="",xlab="",type='n')
xmin<-this.xlim[1]#min(pi.plot$Pos[pi.plot$Chrom%in%chroms2plot[i]])
xmax<-this.xlim[2]#max(pi.plot$Pos[pi.plot$Chrom%in%chroms2plot[i]])
#the shared peaks
p<-lapply(shared.upp$Pos[shared.upp$Chrom %in% chroms2plot[i]],
function(pos){
points(y=c(-0.2,0.5),x=c(pos,pos),
type="l",col=alpha("#543005",0.75),cex=2,lwd=4)
})
points(y=rep(c(-0.2,0.5),length(shared.upp$plot.pos[shared.upp$Chrom %in% chroms2plot[i]])),
x=c(shared.upp$plot.pos[shared.upp$Chrom %in% chroms2plot[i]],
shared.upp$plot.pos[shared.upp$Chrom %in% chroms2plot[i]]),
type="l",col=alpha("#543005",0.75),cex=2,lwd=4)
#putative gene regions
g<-genes2plot[genes2plot$Chrom %in% chroms2plot[i] &
genes2plot$StartBP >= xmin & genes2plot$StartBP <= xmax,]
a<-put.reg[put.reg$Chrom %in% chroms2plot[i] & !(put.reg$Gene %in% fav.genes),]
if(nrow(g) > 0){
rect(xleft=as.numeric(g$StartBP),xright=as.numeric(g$StopBP),
ybottom=-0.2,ytop=0.44,col="indianred",border="indianred")
}
#Fst
if(fst.points==TRUE){
points(this.df$BP,this.df$Corrected.AMOVA.Fst,pch=19,cex=1.5,
col=alpha(col=comp.col["Fst"],0.25),bg=alpha(col=comp.col["Fst"],0.25))
}
#Pi
points(pi.bp.smooth[[2]][pi.bp.smooth[[2]][,"chr"]%in% chroms2plot[i],c("pos","smoothed.stats")],
type="l",lwd=2,col=comp.col["pi"])
upp.low.pts(smooth=pi.bp.smooth[[1]],target=pi.bp.smooth[[2]],chrom=chroms2plot[i],
color=comp.col["pi"],stat="Pi",pos.name="Pos")
#Het
points(ht.bp.smooth[[2]][ht.bp.smooth[[2]][,"chr"]%in% chroms2plot[i],c("pos","smoothed.stats")],
type="l",col=comp.col["Het"],lwd=2)
upp.low.pts(smooth=ht.bp.smooth[[1]],target=ht.bp.smooth[[2]],chrom=chroms2plot[i],
color=comp.col["Het"],stat="Het",pos.name="Pos")
#deltad
points(dd.bp.smooth[[2]][dd.bp.smooth[[2]][,"chr"]%in% chroms2plot[i],c("pos","smoothed.stats")],
type="l",col=comp.col["deltad"],lwd=2)
upp.low.pts(smooth=dd.bp.smooth[[1]],dd.bp.smooth[[2]],chrom=chroms2plot[i],
color=comp.col["deltad"],stat="deltad",pos.name="Pos")
#Josts D
points(jd.bp.smooth[[2]][jd.bp.smooth[[2]][,"chr"]%in% chroms2plot[i],c("pos","smoothed.stats")],
type="l",col=comp.col["D"],lwd=2)
upp.low.pts(smooth=jd.bp.smooth[[1]],jd.bp.smooth[[2]],chrom=chroms2plot[i],
color=comp.col["D"],stat="D",pos.name="Pos")
#shared Fst outliers
points(this.df$BP[this.df$BP %in% fw.sig.reg$BP],
this.df$Corrected.AMOVA.Fst[this.df$BP %in% fw.sig.reg$BP],
pch=8,cex=2,col="orchid4",lwd=3)
if(i == 1){
txt.locs<-data.frame(starts=unique(g$StartBP),name=g$Gene[!duplicated(g$StartBP)])
txt.locs<-txt.locs[order(txt.locs$starts),]
txt.locs[3,"starts"]<-txt.locs[3,"starts"]-500000
txt.locs[4,"starts"]<-txt.locs[4,"starts"]+500000
txt.locs[6,"starts"]<-txt.locs[6,"starts"]-500000
txt.locs[7,"starts"]<-txt.locs[7,"starts"]+500000
txt.locs<-txt.locs[-5,]
text(x=txt.locs$starts,y=0.35,cex=2,labels=txt.locs$name,srt=90,xpd=T)
}
if(i == 4){
if(nrow(g)>0){
txt.locs<-data.frame(starts=unique(g$StartBP),name=g$Gene[!duplicated(g$StartBP)])
txt.locs<-txt.locs[order(txt.locs$starts),]
txt.locs<-txt.locs[-4,]
text(x=txt.locs$starts,y=0.35,cex=2,labels=txt.locs$name,srt=90,xpd=T)
}
}
if(!i %in% c(1,4)){
if(nrow(g)>0){
txt.locs<-data.frame(starts=unique(g$StartBP),name=g$Gene[!duplicated(g$StartBP)])
txt.locs<-txt.locs[order(txt.locs$starts),]
text(x=txt.locs$starts,y=0.35,cex=2,labels=txt.locs$name,srt=90,xpd=T)
}
}
#axes etc
axis(1,pos=-0.2,c(xmin,xmax),
labels = c(round((xmin/1000000),2),round((xmax/1000000),2)),cex.axis=2)
axis(2,las=1,hadj=0.75,cex.axis=2,pos=xmin,at=c(-0.25,0,0.25,0.5))
mtext(chroms2plot[i],1,cex=2*0.75,line=1)
}
#par(fig=c(0, 1, 0, 1), oma=c(0, 0, 0, 0),mar=c(0, 0, 0, 0), new=TRUE)
plot(0, 0, type='n', bty='n', xaxt='n', yaxt='n')
legend("topleft",xpd=TRUE,
legend=c("Putative Gene",
expression(Shared~italic(F)[ST]~Outlier),
expression(Large~pi~and~italic(H))),
bty='n',pch=c(15,8,15),#lwd=c(2,1,4,2,2,0,2,2),lty=c(1,0,1,1,1,0,1,1),
cex = 2,x.intersp = 0.5,y.intersp = 0.75,text.width=0.45,
col=c("indianred","orchid4",alpha("#543005",0.75)))
plot(0, 0, type='n', bty='n', xaxt='n', yaxt='n')
legend("topleft",xpd=TRUE,
legend=c(expression(italic(H)),
expression(pi),#expression(FW-SW~italic(F)[ST]),
expression("Jost's"~italic(D)),
expression(delta~-divergence)),
bty='n',pch=c(15,15,15,15),#lwd=c(2,1,4,2,2,0,2,2),lty=c(1,0,1,1,1,0,1,1),
cex = 2,x.intersp = 0.5,y.intersp = 0.75,text.width=0.45,
col=c(comp.col[1:2],#alpha(col=comp.col["Fst"],0.25)
comp.col[4:5]))
dev.off()
## png
## 2
Figure 6. Low -divergence and high
upp.pi<-NULL
low.pi<-NULL
upp.het<-NULL
low.het<-NULL
upp.dd<-NULL
low.dd<-NULL
for(i in 1:length(lgs)){
upp.pi<-rbind(upp.pi,avg.pi.adj[avg.pi.adj$Avg.Stat >=
quantile(avg.pi.adj$Avg.Stat[avg.pi.adj$Chr %in% lgs[i]],0.95) &
avg.pi.adj$Chr %in% lgs[i],])
low.pi<-rbind(low.pi,avg.pi.adj[avg.pi.adj$Avg.Stat <=
quantile(avg.pi.adj$Avg.Stat[avg.pi.adj$Chr %in% lgs[i]],0.05) &
avg.pi.adj$Chr %in% lgs[i],])
upp.het<-rbind(upp.het,avg.het.adj[avg.het.adj$Avg.Stat >=
quantile(avg.het.adj$Avg.Stat[avg.het.adj$Chr %in% lgs[i]],0.95) &
avg.het.adj$Chr %in% lgs[i],])
low.het<-rbind(low.het,avg.het.adj[avg.het.adj$Avg.Stat <=
quantile(avg.het.adj$Avg.Stat[avg.het.adj$Chr %in% lgs[i]],0.05) &
avg.het.adj$Chr %in% lgs[i],])
}
shared.low<-low.pi[low.pi$plot.pos %in% low.het$plot.pos,]
shared.low$plot.min<-shared.low$Avg.Pos-250000
shared.low$plot.max<-shared.low$Avg.Pos+250000
h.pi.name<-"lowdiv_subgenes.png"
row.settings<-c(4,3)
chroms2plot<-unique(shared.low$Chr)
shared.upp<-shared.low
fst.points<-FALSE
#colors
comp.col<-c(Het="#80cdc1",pi="#018571",Fst="black",D="#a6611a",deltad="#dfc27d")
#find the plotting location
nearest.pos<-function(stat.df,s.pos,s.stat,target,t.pos,t.stat){
near.pos<-t(apply(stat.df,1,function(df,target){
x<-as.numeric(df[s.pos])
pos<-target[which.min(abs(x-as.numeric(target[,t.pos]))),t.pos]
if(pos==x){
stat<-as.numeric(target[which.min(abs(x-as.numeric(target[,t.pos]))),t.stat])
}else{
if(pos>x){
upp<-which.min(abs(x-target[,t.pos]))
low<-upp-1
}else{
low<-which.min(abs(x-target[,t.pos]))
upp<-low+1
}
upp.pt<-target[upp,c(t.pos,t.stat)]
low.pt<-target[low,c(t.pos,t.stat)]
slope<-as.numeric((upp.pt[2]-low.pt[2])/(upp.pt[1]-low.pt[1]))
b<-as.numeric(upp.pt[2]-(slope*upp.pt[1]))
stat<-slope*x+b
}
return(cbind(x,stat))
},target=target))
return(near.pos)
}
upp.low.pts<-function(smooth,target,chrom,color,stat,pos.name,...){
#smooth== smooth[[1]]
#target== smooth[[2]]
upp<-smooth[smooth[,"Chrom"]%in% chrom & smooth$direction=="upper",]
low<-smooth[smooth[,"Chrom"]%in% chrom & smooth$direction=="lower",]
upp.pts<-nearest.pos(stat.df=upp,s.pos=pos.name,s.stat=stat,
target=target[target[,"chr"]%in% chrom,],
t.pos="pos",t.stat="smoothed.stats")
low.pts<-nearest.pos(stat.df=low,s.pos=pos.name,s.stat=stat,
target=target[target[,"chr"]%in% chrom,],
t.pos="pos",t.stat="smoothed.stats")
points(x=upp.pts[,1],y=upp.pts[,2],pch=24,bg=color,col=color,...)
points(x=low.pts[,1],y=low.pts[,2],pch=25,bg=color,col=color,...)
}
png(h.pi.name,height=8,width=14,units="in",res=300)
par(mfrow=row.settings,oma=c(1,1,1,1),mar=c(1,2,2,1))
for(i in 1:length(chroms2plot)){
this.df<-fwsw[fwsw$Chr %in% chroms2plot[i],]
this.xlim<-xlims[[as.character(chroms2plot[i])]]
plot(this.df$BP,this.df$Corrected.AMOVA.Fst, xlim=this.xlim,ylim=c(-0.2,0.5),axes=F,ylab="",xlab="",type='n')
xmin<-this.xlim[1]#min(pi.plot$Pos[pi.plot$Chrom%in%chroms2plot[i]])
xmax<-this.xlim[2]#max(pi.plot$Pos[pi.plot$Chrom%in%chroms2plot[i]])
#the shared peaks
p<-lapply(shared.upp$Pos[shared.upp$Chrom %in% chroms2plot[i]],
function(pos){
points(y=c(-0.2,0.5),x=c(pos,pos),
type="l",col=alpha("#543005",0.75),cex=2,lwd=4)
})
points(y=rep(c(-0.2,0.5),length(shared.upp$plot.pos[shared.upp$Chrom %in% chroms2plot[i]])),
x=c(shared.upp$plot.pos[shared.upp$Chrom %in% chroms2plot[i]],
shared.upp$plot.pos[shared.upp$Chrom %in% chroms2plot[i]]),
type="l",col=alpha("#543005",0.75),cex=2,lwd=4)
#putative gene regions
g<-genes2plot[genes2plot$Chrom %in% chroms2plot[i] &
genes2plot$StartBP >= xmin & genes2plot$StartBP <= xmax,]
a<-put.reg[put.reg$Chrom %in% chroms2plot[i] & !(put.reg$Gene %in% fav.genes),]
if(nrow(g) > 0){
rect(xleft=as.numeric(g$StartBP),xright=as.numeric(g$StopBP),
ybottom=-0.2,ytop=0.44,col="indianred",border="indianred")
}
#Fst
if(fst.points==TRUE){
points(this.df$BP,this.df$Corrected.AMOVA.Fst,pch=19,cex=1.5,
col=alpha(col=comp.col["Fst"],0.25),bg=alpha(col=comp.col["Fst"],0.25))
}
#Pi
points(pi.bp.smooth[[2]][pi.bp.smooth[[2]][,"chr"]%in% chroms2plot[i],c("pos","smoothed.stats")],
type="l",lwd=2,col=comp.col["pi"])
upp.low.pts(smooth=pi.bp.smooth[[1]],target=pi.bp.smooth[[2]],chrom=chroms2plot[i],
color=comp.col["pi"],stat="Pi",pos.name="Pos")
#Het
points(ht.bp.smooth[[2]][ht.bp.smooth[[2]][,"chr"]%in% chroms2plot[i],c("pos","smoothed.stats")],
type="l",col=comp.col["Het"],lwd=2)
upp.low.pts(smooth=ht.bp.smooth[[1]],target=ht.bp.smooth[[2]],chrom=chroms2plot[i],
color=comp.col["Het"],stat="Het",pos.name="Pos")
#deltad
points(dd.bp.smooth[[2]][dd.bp.smooth[[2]][,"chr"]%in% chroms2plot[i],c("pos","smoothed.stats")],
type="l",col=comp.col["deltad"],lwd=2)
upp.low.pts(smooth=dd.bp.smooth[[1]],dd.bp.smooth[[2]],chrom=chroms2plot[i],
color=comp.col["deltad"],stat="deltad",pos.name="Pos")
#Josts D
points(jd.bp.smooth[[2]][jd.bp.smooth[[2]][,"chr"]%in% chroms2plot[i],c("pos","smoothed.stats")],
type="l",col=comp.col["D"],lwd=2)
upp.low.pts(smooth=jd.bp.smooth[[1]],jd.bp.smooth[[2]],chrom=chroms2plot[i],
color=comp.col["D"],stat="D",pos.name="Pos")
#shared Fst outliers
points(this.df$BP[this.df$BP %in% fw.sig.reg$BP],
this.df$Corrected.AMOVA.Fst[this.df$BP %in% fw.sig.reg$BP],
pch=8,cex=2,col="orchid4",lwd=3)
if(i == 1){
txt.locs<-data.frame(starts=unique(g$StartBP),name=g$Gene[!duplicated(g$StartBP)])
txt.locs<-txt.locs[order(txt.locs$starts),]
txt.locs[3,"starts"]<-txt.locs[3,"starts"]-500000
txt.locs[4,"starts"]<-txt.locs[4,"starts"]+500000
txt.locs[6,"starts"]<-txt.locs[6,"starts"]-500000
txt.locs[7,"starts"]<-txt.locs[7,"starts"]+500000
txt.locs<-txt.locs[-5,]
text(x=txt.locs$starts,y=0.35,cex=2,labels=txt.locs$name,srt=90,xpd=T)
}
if(i == 4){
if(nrow(g)>0){
txt.locs<-data.frame(starts=unique(g$StartBP),name=g$Gene[!duplicated(g$StartBP)])
txt.locs<-txt.locs[order(txt.locs$starts),]
txt.locs<-txt.locs[-4,]
text(x=txt.locs$starts,y=0.35,cex=2,labels=txt.locs$name,srt=90,xpd=T)
}
}
if(!i %in% c(1,4)){
if(nrow(g)>0){
txt.locs<-data.frame(starts=unique(g$StartBP),name=g$Gene[!duplicated(g$StartBP)])
txt.locs<-txt.locs[order(txt.locs$starts),]
text(x=txt.locs$starts,y=0.35,cex=2,labels=txt.locs$name,srt=90,xpd=T)
}
}
#axes etc
axis(1,pos=-0.2,c(xmin,xmax),
labels = c(round((xmin/1000000),2),round((xmax/1000000),2)),cex.axis=2)
axis(2,las=1,hadj=0.75,cex.axis=2,pos=xmin,at=c(-0.25,0,0.25,0.5))
mtext(chroms2plot[i],1,cex=2*0.75,line=1)
}
#par(fig=c(0, 1, 0, 1), oma=c(0, 0, 0, 0),mar=c(0, 0, 0, 0), new=TRUE)
plot(0, 0, type='n', bty='n', xaxt='n', yaxt='n')
legend("topleft",xpd=TRUE,
legend=c("Putative Gene",
expression(Shared~italic(F)[ST]~Outlier),
expression(Large~pi~and~italic(H))),
bty='n',pch=c(15,8,15),#lwd=c(2,1,4,2,2,0,2,2),lty=c(1,0,1,1,1,0,1,1),
cex = 2,x.intersp = 0.5,y.intersp = 0.75,text.width=0.45,
col=c("indianred","orchid4",alpha("#543005",0.75)))
plot(0, 0, type='n', bty='n', xaxt='n', yaxt='n')
legend("topleft",xpd=TRUE,
legend=c(expression(italic(H)),
expression(pi),#expression(FW-SW~italic(F)[ST]),
expression("Jost's"~italic(D)),
expression(delta~-divergence)),
bty='n',pch=c(15,15,15,15),#lwd=c(2,1,4,2,2,0,2,2),lty=c(1,0,1,1,1,0,1,1),
cex = 2,x.intersp = 0.5,y.intersp = 0.75,text.width=0.45,
col=c(comp.col[1:2],#alpha(col=comp.col["Fst"],0.25)
comp.col[4:5]))
dev.off()
## png
## 2
Low pi and low het
To look for selective sweeps, I’m going to use the FLK R code by Bonhomme et al, downloaded from https://qgsp.jouy.inra.fr/index.php?option=com_content&view=article&id=50&Itemid=55
The mandatory input is a matrix of allele frequencies for each population.
calc.allFreqs<-function(vcf,pop.list, pop.labs){
allFreqs<-do.call(rbind,lapply(pop.list, function(pop){
this.vcf<-cbind(vcf[,1:9],vcf$SNP,vcf[,grep(pop,colnames(vcf))])
afs<-do.call(rbind,apply(this.vcf,1,calc.afs.vcf))
return(afs$RefFreq)
}))
colnames(allFreqs)<-vcf$SNP
rownames(allFreqs)<-pop.labs
return(allFreqs)
}
freqs<-calc.allFreqs(remove.missing.data(vcf, pop.list),pop.list,pop.labs)
write.table(freqs,"flk.freqs",row.names = TRUE)
source("../scripts/FLK.R")
library(ape); library(phangorn)
freqs<-read.table("flk.freqs",row.names=1)
## estimate the population tree using Reynolds distances
## computed on the SNP data, without specifying an outgroup
Fsnp<-Fij(freqs)
## Now compute the FLK and LK tests
tests<-FLK(freqs,Fsnp)
Then I run the programs/FLKnull.py program on the output to estimate the empirical null distribution. Once that has run, we use the following code provided by Bonhomme et al to plot the null distribution
null<-read.table('envelope.txt',head=T)
plot(tests$Ht,tests$F.LK,pch=19,col="gray",xlab='Heterozygosity',ylab='FLK statistic',xlim=c(0,0.5))
lines(null$Ht,null$q0.005,type='l',col='navy')
lines(null$Ht,null$q0.995,type='l',col='navy')
lines(null$Ht,null$q0.025,type='l',col='navy')
lines(null$Ht,null$q0.975,type='l',col='navy')
lines(null$Ht,null$q0.5,type='l',col='navy')
lines(smooth.spline(null$Ht,null$q0.995))
lines(smooth.spline(null$Ht,null$q0.005))
lines(smooth.spline(null$Ht,null$q0.025),lty=2)
lines(smooth.spline(null$Ht,null$q0.975),lty=2)
lines(smooth.spline(null$Ht,null$q0.5),lty=3)
Use PopGenome to do the CLR method. First I need to convert my vcf file to a tabix-ed vcf file, using bgzip p4.upd.vcf followed by tabix -p vcf p4.upd.vcf.gz, and saved these in the popgenome/ directory.
mins<-tapply(X = vcf$POS,INDEX = vcf$`#CHROM`,min)[scaffs]
maxs<-tapply(X = vcf$POS,INDEX = vcf$`#CHROM`,max)[scaffs]
snps<-tapply(X = vcf$POS,INDEX = vcf$`#CHROM`,length)[scaffs]
library(PopGenome)
GENOME.class <- readVCF("popgenome/p4.upd.vcf.gz",numcols=snps["LG1"],tid="LG1",
frompos=mins["LG1"],topos=maxs["LG1"],
include.unknown = TRUE)
TXSP<-grep("TXSP",colnames(vcf),value = TRUE)
TXCC<-grep("TXCC",colnames(vcf),value = TRUE)
TXFW<-grep("TXFW",colnames(vcf),value = TRUE)
TXCB<-grep("TXCB",colnames(vcf),value = TRUE)
LAFW<-grep("LAFW",colnames(vcf),value = TRUE)
ALST<-grep("ALST",colnames(vcf),value = TRUE)
ALFW<-grep("ALFW",colnames(vcf),value = TRUE)
FLSG<-grep("FLSG",colnames(vcf),value = TRUE)
FLKB<-grep("FLKB",colnames(vcf),value = TRUE)
FLFD<-grep("FLFD",colnames(vcf),value = TRUE)
FLSI<-grep("FLSI",colnames(vcf),value = TRUE)
FLAB<-grep("FLAB",colnames(vcf),value = TRUE)
FLPB<-grep("FLPB",colnames(vcf),value = TRUE)
FLHB<-grep("FLHB",colnames(vcf),value = TRUE)
FLCC<-grep("FLCC",colnames(vcf),value = TRUE)
FLFW<-grep("FLFW",colnames(vcf),value = TRUE)
genvcf<-set.populations(genvcf,
list(TXSP,TXCC,TXFW,TXCB,LAFW,ALST,ALFW,FLSG,FLKB,FLFD,FLSI,FLAB,FLPB,FLHB,FLCC,FLFW),
diploid=TRUE)
Convert to sweepfinder2 format:
#one file for each chromosome
#first column is position
#second column is count of derived alleles
#third column is the total number of observed alleles
#fourth column indicates whether it's derived or ancestral.
vcf2sfaf<-function(vcf,lgs){
chrs<-lapply(lgs,function(lg){
gts<-extract.gt.vcf(vcf[vcf$`#CHROM` == lg,])
sf.af<-do.call(rbind,apply(gts,1,function(row){
gt<-row[10:length(row)]
ref<-(length(gt[gt=="0/0"])*2)+
length(gt[gt=="0/1"])+length(gt[gt=="1/0"])
alt<-(length(gt[gt=="1/1"])*2)+
length(gt[gt=="0/1"])+length(gt[gt=="1/0"])
return(data.frame(position=row["POS"],x=ref,n=alt))
}))
write.table(sf.af,paste("SF2/",lg,".AF.txt",sep=""),
col.names = TRUE,row.names=FALSE,sep='\t',
quote=FALSE,eol='\n')
print(paste("Writing to file: SF2/",lg,".AF.txt",sep=""))
return(sf.af)
})
}
af<-vcf2sfaf(vcf,lgs)
Make separate files
header<-"##fileformat=VCFv4.2"
for(i in 1:length(lgs)){
write.table(header,paste(lgs[i],".vcf",sep=""),quote=FALSE,
col.names=FALSE,row.names=FALSE )
write.table(vcf[vcf$`#CHROM`==lgs[i],-708],paste(lgs[i],".vcf",sep=""),
col.names=TRUE,row.names=FALSE,quote=FALSE,sep='\t',append = TRUE)
}
I’m highlighting a few of the putative genes that have a bunch of outliers nearby or in them. First is TNS1, which matched three genome annotations but only had one region, on LG1, that contained shared outlier Fsts, delta divergence, Bayenv salinity, were near monophyletic neighbor joining trees.
fst.dfs<-list(fwsw.tx,fwsw.la,fwsw.al,fwsw.fl)
names(fst.dfs)<-c("TX FWSW","LA FWSW","AL FWSW","FL FWSW")
colors=c(grp.colors[1],grp.colors[2],grp.colors[3],grp.colors[6])
alphas=c(0.5,0.5,1,0.5)
ltys=c(1,1,2,1)
The plotting function gene.region.plot accpets a number of parameters.
gene.region.plot<-function(chrom,gene,put.reg,vcf,chrom.ld, fst.dfs,deltad=FALSE,D=FALSE,
colors="black",lwds=2,alphas=0.5,ltys=1,legend=TRUE,bases="bp",
smooth=FALSE, smooth.loess=TRUE,fst.name="Corrected.AMOVA.FST",txt.cex=1,y.lim=c(0,1),...){
pstart<-min(as.numeric(as.character(put.reg[put.reg$Gene ==gene &
put.reg$Chrom==chrom,"StartBP"])))-(chrom.ld[chrom]/2)
pstop<-max(as.numeric(as.character(put.reg[put.reg$Gene ==gene &
put.reg$Chrom==chrom,"StartBP"])))+(chrom.ld[chrom]/2)
if(pstart < 0){ pstart<- 0 }
if(pstop > max(vcf[vcf$`#CHROM`==chrom,"POS"])){ pstop <- max(vcf[vcf$`#CHROM`==chrom,"POS"]) }
starts<-as.numeric(put.reg[put.reg$Chrom %in% chrom & put.reg$Gene == gene,"StartBP"])
stops<-as.numeric(put.reg[put.reg$Chrom %in% chrom & put.reg$Gene == gene,"StopBP"])
if(smooth==TRUE){
#generate the dataframes
smooth.fsts<-lapply(fst.dfs,function(df){
this.df<-df[df$Chr %in% chrom,]
this.smooth<-do.call("rbind",lapply(seq(1,nrow(this.df),(nrow(this.df)*0.15)/5),sliding.avg,
dat=data.frame(Pos=this.df$BP,Fst=this.df[,fst.name]),
width=nrow(this.df)*0.15))
return(this.smooth)
})
} else{
smooth.fsts<-lapply(fst.dfs,function(df){
this.smooth<-df[df$Chr %in% chrom,c("BP",fst.name)]
})
}
if(is.data.frame(deltad)){
this.dd<-deltad[deltad$Chrom %in% chrom,]
if(smooth.loess==TRUE){
dd.smooth<-loess.smooth(this.dd$Pos,this.dd$deltad,span=0.1,degree=2)
names(dd.smooth)<-c("pos","smoothed.stats")
}else{
dd.smooth<-this.dd
}
}else if(is.list(deltad)){
dd.smooth<-deltad[[2]][deltad[[2]][,"chr"]%in%chrom,]
}else{
dd.smooth<-NULL
}
if(is.data.frame(D)){
this.d<-D[D$Chr %in% chrom,]
if(smooth.loess==TRUE) {
dsmooth<-loess.smooth(this.d$POS,this.d$D,span=0.1,degree=2)
names(dd.smooth)<-c("pos","smoothed.stats")
}else{
dsmooth<-this.d
}
}else if(is.list(D)){
dsmooth<-D[[2]][D[[2]][,"chr"]%in%chrom,]
}else{
D<-NULL
}
pr.gene<-put.reg[put.reg$Chrom==chrom & put.reg$Gene==gene,]
fst.gene<- unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$rad.loci[pr.gene$rad.loci != "NA"]),","))
sal.gene<- unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$bfs.in[pr.gene$bfs.in != "NA"]),","))
# tmp.gene<- unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$bft.in[pr.gene$bft.in != "NA"]),","))
#grs.gene<- unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$bfg.in[pr.gene$bfg.in != "NA"]),","))
#jd.gene<-unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$jostd.nb[jostd.nb != "NA"]),","))
sdd.gene<- unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$sdd.in[pr.gene$sdd.in != "NA"]),","))
#nj.gene<-as.numeric(unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$nj.nb[pr.gene$nj.nb != "NA"]),",")))
nj.gene<-NULL
nb.gene<-as.numeric(unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$nearby.rad[pr.gene$nearby.rad != "NA"]),",")))
gene.out<-data.frame(BP=c(as.numeric(fst.gene),as.numeric(sal.gene),as.numeric(sdd.gene)),
shape=c(rep(8,length(fst.gene)),rep(1,length(sal.gene)),rep(4,length(sdd.gene))))
if(length(colors) != length(smooth.fsts)){
colors<-rep(colors,length(smooth.fsts)/length(colors))
}
if(length(lwds) != length(smooth.fsts)){
lwds<-rep(lwds,length(smooth.fsts)/length(lwds))
}
if(length(alphas) != length(smooth.fsts)){
alphas<-rep(alphas,length(smooth.fsts)/length(alphas))
}
if(length(ltys) != length(smooth.fsts)){
ltys<-rep(ltys,length(smooth.fsts)/length(ltys))
}
plot(smooth.fsts[[1]],type='n',ylim=c(y.lim[1],y.lim[2]+0.02),
bty="L",xlab="",ylab="",xaxt='n',yaxt='n',xlim=c(pstart,pstop))
axis(2,las=1,cex.axis=txt.cex)
xlabs<-c(pstart,pstop)
if(bases %in% c("mb","MB","Mb")) xlabs<-c(round((pstart/1000000),2),round((pstop/1000000),2))
if(bases %in% c("kb","KB","Kb")) xlabs<-c(round((pstart/1000),2),round((pstop/1000),2))
axis(1,at=c(pstart,pstop),cex.axis=txt.cex,labels = xlabs)
#add putative gene
rect(xleft=as.numeric(starts),xright=as.numeric(stops),
ybottom=y.lim[1]-0.04,ytop=y.lim[2],col="indianred",border="indianred")
#add fsts
mtext(chrom,1,cex=0.75*txt.cex,line = 1)
for(i in 1:length(smooth.fsts)){
points(smooth.fsts[[i]],col=alpha(colors[i],alphas[i]),type="l",lwd=lwds[i],lty=ltys[i])
}
#add delta divergence
if(is.data.frame(dd.smooth)){
points(dd.smooth$pos,dd.smooth$smoothed.stats,col="#dfc27d",type="l",lwd=2)
}
if(is.list(deltad)){
upp.low.pts(smooth=deltad[[1]],target=deltad[[2]],chrom=chrom,color=comp.col["deltad"],stat="deltad",pos.name="Pos")
}
#add Jost's D
if(is.data.frame(dsmooth)){
points(dsmooth$pos,dsmooth$smoothed.stats,type="l",col="#a6611a",lwd=2)
}
if(is.list(D)){
upp.low.pts(smooth=D[[1]],target=D[[2]],chrom=chrom,color=comp.col["D"],stat="D",pos.name="Pos")
}
#are nj trees shown in scope of fig?
njs.eval<-lapply(nj.gene,function(x){
if(x >= pstart & x <= pstop) { eval = TRUE }
else { eval = FALSE }
return(eval)
})
#add delta d
if(is.data.frame(deltad)){
lgnd<-unlist(lapply(names(fst.dfs),function(n) { bquote(.(n)~italic(F[ST])) }))
lgnd<-c(lgnd,expression(delta~-divergence))
}else{
lgnd<-unlist(lapply(names(fst.dfs),function(n) { substitute(paste(name,italic(F[ST]),sep=""),list(name=n)) }))
}
cols<-NULL
pchs<-rep(32,length(lwds))
for(i in 1:length(colors)){
cols<-c(cols,alpha(colors[i],alphas[i]))
}
if(!is.null(deltad)){
cols<-c(cols,"#dfc27d")
pchs<-c(pchs,32)
ltys<-c(ltys,1)
lwds<-c(lwds,2)
}
if(!is.null(D)){
cols<-c(cols,"#a6611a")
pchs<-c(pchs,32)
lgnd<-c(lgnd,"Jost's D")
ltys<-c(ltys,1)
lwds<-c(lwds,2)
}
lgnd<-c(lgnd,substitute(italic(g),list(g=gene)),"Outlier RAD loci")
cols<-c(cols,"indianred","black")
pchs<-c(pchs,15,124)
if(length(grep(TRUE,njs.eval))>0){
lgnd<-c(lgnd,"Monophyletic Gene Tree")
cols<-c(cols,"#08519c")
pchs<-c(pchs,124)
}
if(length(sal.gene)>0){
lgnd<-c(lgnd,"Salinity-associated")
cols<-c(cols,"black")
pchs<-c(pchs,25)
}
if(legend==TRUE){
legend("topleft",
legend=lgnd,pch=pchs,
bty='n',lwd=c(lwds,0,0,0,0),lty=c(ltys,0,0,0,0),
col=cols,cex = txt.cex)
}
#add outlier loci
clip(x1=min(as.numeric(pstart)),x2=max(as.numeric(pstop)),y1=y.lim[2]+.01,y2=y.lim[2]+0.2)
abline(v=fst.gene,col="black")
points(x=sal.gene,y=rep(y.lim[2]+0.05,length(sal.gene)),col="black",pch=25,cex=0.75*txt.cex,lwd=2)
abline(v=nb.gene,col=alpha("black",0.5),cex=2)
abline(v=nj.gene,col="#08519c",cex=2)
}
outside.legend<-function(...){
opar <- par(fig=c(0, 1, 0, 1), oma=c(0, 0, 0, 0),
mar=c(0, 0, 0, 0), new=TRUE)
on.exit(par(opar))
plot(0, 0, type='n', bty='n', xaxt='n', yaxt='n')
legend(...)
}
#set up legend parameters
leg.text<-c(expression(TX~FWSW~italic(F)[ST]),expression(LA~FWSW~italic(F)[ST]),
expression(AL~FWSW~italic(F)[ST]),expression(FL~FWSW~italic(F)[ST]),
expression(delta~-divergence),expression(Shared~italic(F)[ST]~Outliers),
"Salinity-associated SNPs")
leg.pchs<-c(rep(32,5),124,25,124)
leg.ltys<-c(1,1,2,1,1,0,0,0)
leg.cols<-c(colors,"cornflowerblue","black","black","indianred")
png("ARGHEF3.png",height=8,width=10,units="in",res=300)
par(mfrow=c(5,3),oma=c(2,2,3,2),mar=c(2,2,1,1))
arhgef3<-lapply(unique(put.reg[put.reg$Gene=="ARHGEF3" & put.reg$Chrom%in%lgs,"Chrom"]),# & !is.na(put.reg$rad.loci)
gene.region.plot,gene="ARHGEF3",put.reg=put.reg,vcf=vcf,chrom.ld=chrom.ld,
fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
outside.legend("top",legend=c(leg.text,expression(italic(ARHGEF3))),pch=leg.pchs,lty=leg.ltys,lwd=2,
bty='n',ncol=3,cex=1.25,x.intersp=0,col=leg.cols)
dev.off()
## png
## 2
png("mucin.png",height=8,width=10,units="in",res=300)
par(mfrow=c(5,3),oma=c(2,2,3,2),mar=c(2,2,1,1))
mucin<-lapply(unique(put.reg[put.reg$Gene=="mucin"& put.reg$Chrom%in%lgs,"Chrom"]),# & !is.na(put.reg$rad.loci)
gene.region.plot,gene="mucin",put.reg=put.reg,vcf=vcf,chrom.ld=chrom.ld,
fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
smooth=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
outside.legend("top",legend=c(leg.text,expression(italic(mucin))),pch=leg.pchs,lty=leg.ltys,lwd=2,
bty='n',ncol=3,cex=1.25,x.intersp=0,col=leg.cols)
dev.off()
## png
## 2
png("NHE.png",height=4,width=6,units="in",res=300)
par(mfrow=c(2,4),oma=c(2,2,3,2),mar=c(2,2,2,2))
nhe<-lapply(unique(put.reg[put.reg$Gene=="NHE" & put.reg$Chrom%in%lgs,"Chrom"]),# & !is.na(put.reg$rad.loci)
gene.region.plot,gene="NHE",put.reg=put.reg,vcf=vcf,chrom.ld=chrom.ld,
fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
smooth=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
outside.legend("top",legend=c(leg.text,expression(italic(NHE))),pch=leg.pchs,lty=leg.ltys,lwd=2,
bty='n',ncol=3,cex=1.25,x.intersp=0,col=leg.cols)
dev.off()
## png
## 2
png("TAAR.png",height=4,width=6,units="in",res=300)
par(mfrow=c(1,2),oma=c(2,2,3,2),mar=c(2,2,2,2))
taar<-lapply(unique(put.reg[put.reg$Gene=="TAAR" & put.reg$Chrom%in%lgs,"Chrom"]), #& !is.na(put.reg$rad.loci)
gene.region.plot,gene="TAAR",put.reg=put.reg,vcf=vcf,chrom.ld=chrom.ld,
fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
smooth=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
outside.legend("top",legend=c(leg.text,expression(italic(TAAR))),pch=leg.pchs,lty=leg.ltys,lwd=2,
bty='n',ncol=3,cex=.75,x.intersp=0,col=leg.cols)
dev.off()
## png
## 2
Supplmental Fig. 5
Supplemental Fig. 6
Supplemental Fig. 7
Supplemental Fig. 8
colors=c(grp.colors[1],grp.colors[2],grp.colors[3],grp.colors[6])
alphas=c(0.5,0.5,1,0.5)
ltys=c(1,1,2,1)
png("Fig7candidateGenes.png",height=10,width=10,units="in",res=300)
par(mfrow=c(4,3),oma=c(2,2,4.5,2),mar=c(1,3,2,1))
#row1
gene.region.plot("LG1","NHE",put.reg,vcf=vcf,chrom.ld=chrom.ld,
fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=4500000,y=0.5,expression(italic(NHE)),xpd=T,cex=2)
gene.region.plot("LG5","NHE",put.reg,vcf=vcf,chrom.ld=chrom.ld,
fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=4500000,y=0.5,expression(italic(NHE)),xpd=T,cex=2)
gene.region.plot("LG11","NHE",put.reg,vcf=vcf,chrom.ld=chrom.ld,
fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=3000000,y=0.5,expression(italic(NHE)),xpd=T,cex=2)
#row2
gene.region.plot("LG2","mucin",put.reg,vcf=vcf,chrom.ld=chrom.ld,
fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=4500000,y=0.5,expression(italic(mucin)),xpd=T,cex=2)
gene.region.plot("LG4","TAAR",put.reg,vcf=vcf,chrom.ld=chrom.ld,
fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=15000000,y=0.5,expression(italic(TAAR)),xpd=T,cex=2)
gene.region.plot("LG7","TAAR",put.reg,vcf=vcf,chrom.ld=chrom.ld,
fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=3000000,y=0.5,expression(italic(TAAR)),xpd=T,cex=2)
#row3
gene.region.plot("LG7","ARHGEF3",put.reg,vcf=vcf,chrom.ld=chrom.ld,
fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=8500000,y=0.5,expression(italic(ARHGEF3)),xpd=T,cex=2)
gene.region.plot("LG13","ARHGEF3",put.reg,vcf=vcf,chrom.ld=chrom.ld,
fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=2200000,y=0.5,expression(italic(ARHGEF3)),xpd=T,cex=2)
gene.region.plot("LG14","ARHGEF3",put.reg,vcf=vcf,chrom.ld=chrom.ld,
fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=6500000,y=0.5,expression(italic(ARHGEF3)),xpd=T,cex=2)
#row4
gene.region.plot("LG18","ARHGEF3",put.reg,vcf=vcf,chrom.ld=chrom.ld,
fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=5500000,y=0.5,expression(italic(ARHGEF3)),xpd=T,cex=2)
gene.region.plot("LG19","ARHGEF3",put.reg,vcf=vcf,chrom.ld=chrom.ld,
fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=4000000,y=0.5,expression(italic(ARHGEF3)),xpd=T,cex=2)
gene.region.plot("LG20","ARHGEF3",put.reg,vcf=vcf,chrom.ld=chrom.ld,
fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=4000000,y=0.5,expression(italic(ARHGEF3)),xpd=T,cex=2)
outside.legend("top",legend=c(expression(TX~FWSW~italic(F)[ST]),expression(LA~FWSW~italic(F)[ST]),
expression(AL~FWSW~italic(F)[ST]),expression(FL~FWSW~italic(F)[ST]),
expression(delta~-divergence),expression("Jost's"~italic(D)),
expression(Shared~italic(F)[ST]~Outliers),
"Salinity-associated SNPs","FW gene"),
pch=c(rep(32,6),124,25,124),lty=c(1,1,2,1,1,1,0,0,0),lwd=2,
bty='n',ncol=5,cex=1.25,x.intersp=0,
col=c(colors,comp.col["deltad"],comp.col["D"],"black","black","indianred"))
#
# par(fig = c(0, 1, 0, 1),oma=rep(0, 4), mar=rep(0, 4), new=TRUE)
# plot(0:1.5, 0:1.5, type="n", xlab="", ylab="", axes=FALSE)
# legend(x=0.5,y=0.22,c(expression(Shared~italic(F)[ST]~Outliers),
# "Salinity-associated SNPs",
# "FW Candidate Gene"),
# pch=c(124,25,124),lty=c(0,0,0),lwd=2,
# col=c("black","black","indianred"),
# bty='n',ncol=1,cex=2,x.intersp=-0.5,xpd=T)
#
#
# #add lines to the top
# par(fig = c(0, 1, 0, 1),oma=rep(0, 4), mar=rep(0, 4), new=TRUE)
# plot(0:1, 0:1, type="n", xlab="", ylab="", axes=FALSE)
# legend("top",c(expression(TX~FWSW~italic(F)[ST]),
# expression(LA~FWSW~italic(F)[ST]),
# expression(AL~FWSW~italic(F)[ST]),
# expression(FL~FWSW~italic(F)[ST]),
# expression(delta~-divergence)),
# pch=rep(32,5),lty=c(1,1,1,1,1),lwd=2,text.width=0.16,
# col=c(grp.colors[1],grp.colors[2],grp.colors[3],grp.colors[6],
# "#dfc27d"),
# bty='n',ncol=3,cex=2,x.intersp=0.5,y.intersp=0.75,xpd=T)
dev.off()
## png
## 2
Fig. 7
LG4 clearly is enriched for outliers, let’s look at it more closely, including putative FW genes.
#define a few things
chrom<-"LG4"
pchs<-c(rep(32,6),15,124,124)
cols<-c(grp.colors[1],grp.colors[2],grp.colors[3],grp.colors[6],
"#dfc27d","#a6611a","indianred","black","#08519c")
ltys=c(1,1,2,1,1,1,0,0,0)
lwds<-c(rep(2,6),0,0,0)
#put together the data for LG4
#this.dd<-deltad[deltad$Chrom %in% chrom,]
#dd.smooth<-loess.smooth(this.dd$Pos,this.dd$deltad,span=0.1,degree=2)
#add Jost's D
#this.d<-jostd[jostd$Chr %in% chrom,]
#dsmooth<-loess.smooth(this.d$POS,this.d$D,span=0.1,degree=2)
#get the outliers
pr.gene<-put.reg[put.reg$Chrom==chrom ,]
#rad loci
fst.gene<- stacks.sig[stacks.sig$Chr==chrom,"BP"]
#none of the salinity-associated RAD loci are in or near genes on LG8, but we could plot them all anyway
sal.gene<-sal.bf.sig[sal.bf.sig$scaffold=="LG8","BP"]
#jost's d - none are in the putative genes
jd.gene<-unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$jost.in[jost.in != "NA"]),","))
#delta divergence outliers- none are in the putative genes
sdd.gene<- unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$sdd.in[pr.gene$sdd.in != "NA"]),","))
#put all of them together with their associated shapes
gene.out<-data.frame(BP=c(as.numeric(fst.gene),as.numeric(sal.gene),as.numeric(sdd.gene)),
shape=c(rep(8,length(fst.gene)),rep(1,length(sal.gene)),rep(4,length(sdd.gene))))
#add putative genes
g<-put.reg[put.reg$Chrom %in% chrom,]
g$Gene<-as.character(g$Gene)
g$Gene[grep("ATP6",g$Gene)]<-"ATP6V" #standardize the names
g<-g[,c("Gene","StartBP","StopBP")]
g<-g[!duplicated(g$StartBP),]
g<-g[order(g$StartBP),]
png("LG4.png",width=10,height=7,units="in",res=300)
nf <- layout(matrix(c(1,1,1,1,
2,2,2,2), nrow=2, byrow=TRUE))
### Plot all of LG4
par(oma=c(2.5,3,2,1),mar=c(1.5,1,2,1))
plot(fst.dfs[[1]]$BP[fst.dfs[[1]]$Chr %in% chrom],
fst.dfs[[1]]$Smoothed.AMOVA.Fst[fst.dfs[[1]]$Chr %in% chrom],
type='n',ylim=c(0,1),bty="L",xlab="",ylab="",xaxt='n',yaxt='n')
axis(2,las=1,cex.axis=1.75)
axis(1,cex.axis=1.75,at=seq(0,1.8*10^7,3*10^6),labels = seq(0,18,3))
#add fsts
#mtext(chrom,1,cex=2*0.75,line=2.5)
for(i in 1:length(fst.dfs)){
points(fst.dfs[[i]]$BP[fst.dfs[[i]]$Chr %in% chrom],
fst.dfs[[i]]$Smoothed.AMOVA.Fst[fst.dfs[[i]]$Chr %in% chrom],
col=alpha(colors[i],alphas[i]),type="l",lwd=lwds[i],lty=ltys[i])
}
#add delta-d and Jost's D
#Pi
points(pi.bp.smooth[[2]][pi.bp.smooth[[2]][,"chr"]%in% chrom,c("pos","smoothed.stats")],
type="l",lwd=2,col=comp.col["pi"])
upp.low.pts(smooth=pi.bp.smooth[[1]],target=pi.bp.smooth[[2]],
chrom=chrom,color=comp.col["pi"],stat="Pi",pos.name="Pos",cex=1.75)
#Het
points(ht.bp.smooth[[2]][ht.bp.smooth[[2]][,"chr"]%in% chrom,c("pos","smoothed.stats")],
type="l",col=comp.col["Het"],lwd=2)
upp.low.pts(smooth=ht.bp.smooth[[1]],target=ht.bp.smooth[[2]],
chrom=chrom,color=comp.col["Het"],stat="Het",pos.name="Pos",cex=1.75)
#deltad
points(dd.bp.smooth[[2]][dd.bp.smooth[[2]][,"chr"]%in% chrom,c("pos","smoothed.stats")],
type="l",col=comp.col["deltad"],lwd=2)
upp.low.pts(smooth=dd.bp.smooth[[1]],target=dd.bp.smooth[[2]],
chrom=chrom,color=comp.col["deltad"],stat="deltad",pos.name="Pos",cex=1.75)
#Josts D
points(jd.bp.smooth[[2]][jd.bp.smooth[[2]][,"chr"]%in% chrom,c("pos","smoothed.stats")],
type="l",col=comp.col["D"],lwd=2)
upp.low.pts(smooth=jd.bp.smooth[[1]],target=jd.bp.smooth[[2]],
chrom=chrom,color=comp.col["D"],stat="D",pos.name="Pos",cex=1.75)
#add the putative genes
starts<-as.numeric(g[,"StartBP"])
stops<-as.numeric(g[,"StopBP"])
rect(xleft=as.numeric(starts),xright=as.numeric(stops),
ybottom=-0.04,ytop=1,col="indianred",border="indianred")
#add the ones that are spaced out
text(x=g$StartBP[!g$Gene %in% c("SCUBE1","TRIM14")],y=0.5,
labels=g$Gene[!g$Gene %in% c("SCUBE1","TRIM14")],font=2,srt=90,xpd=T,cex=1.75)
text(x=g$StartBP[g$Gene == "SCUBE1"]-50000,y=0.8,
labels=g$Gene[g$Gene =="SCUBE1"],font=2,srt=90,xpd=T,cex=1.75)
text(x=g$StartBP[g$Gene == "TRIM14"]+50000,y=0.2,
labels=g$Gene[g$Gene =="TRIM14"],font=2,srt=90,xpd=T,cex=1.75)
#add outlier loci
clip(x1=min(as.numeric(fst.dfs[[1]]$BP[fst.dfs[[1]]$Chr %in% chrom])),
x2=max(as.numeric(fst.dfs[[1]]$BP[fst.dfs[[1]]$Chr %in% chrom])),y1=0.99,y2=1.06)
abline(v=fst.gene,col="black",lwd=1.5)
points(x=sal.gene,y=rep(1.05,length(sal.gene)),col="black",pch=25,cex=1)
### Plot the recombination rate
load("../../scovelli_genome/ssc_map.rda")
rec.cols<-c("#fdbb84","#fc8d59","#ef6548")
plot(set[["LG4"]]@interpolations$slidingwindow@physicalPositions,
set[["LG4"]]@interpolations$slidingwindow@rates,type="l",bty="L",
ylim=c(-5,25),xlab="",ylab="",xaxt='n',yaxt='n',lwd=2,col=rec.cols[1])
abline(h=0,lty=2)
axis(2,cex.axis=1.75,las=1)
axis(1,at=seq(0,1.8*10^7,3*10^6),labels = seq(0,18,3),cex.axis=1.75)
mtext("Position on LG4 (Mb)",1,line=2.2,cex=1.5*0.75)
mtext("Recombination Rate (cM/Mb)",2,line=2.5,cex=1.5*0.75)
lines(set[["LG4"]]@interpolations$spline@physicalPositions,
set[["LG4"]]@interpolations$spline@rates,type="l",col=rec.cols[2],lwd=2)
lines(set[["LG4"]]@interpolations$loess@physicalPositions,
set[["LG4"]]@interpolations$loess@rates,type="l",lwd=2,col=rec.cols[3])
legend("topleft",bty='n',col=rec.cols,
lwd=2,c("Sliding Window","Spline","Loess"),cex=1.5)
### add legend
#create the legend text, and parameters
lgnd<-unlist(lapply(names(fst.dfs),function(n) { bquote(.(n)~italic(F[ST])) }))
lgnd<-c(lgnd,expression(delta~-divergence),"Jost's D","gene regions",
expression("Shared"~italic(F[ST])~"outlier"),"Salinity-associated")
pchs<-c(rep(32,6),15,124,25)
cols<-c(grp.colors[1],grp.colors[2],grp.colors[3],grp.colors[6],
"#dfc27d","#a6611a","indianred","black","black")
ltys=c(1,1,2,1,1,1,0,0,0)
lwds<-c(rep(2,6),0,0,1)
#plot the legend
par(fig=c(0, 1, 0, 1), oma=c(0, 0, 0, 0), mar=c(0, 0, 0, 0), new=TRUE,xpd=TRUE)
plot(c(0,1),c(0,1),bty='n',type='n',xlab="",ylab="",xaxt='n',yaxt='n')
legend("top",legend=lgnd,pch=pchs,ncol=5,
bty='n',lwd=lwds,lty=ltys,
col=cols,xpd=TRUE,cex=1.5,
x.intersp = 0.5,y.intersp = 0.75)
dev.off()
## png
## 2
LG 4 Figure